Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:
 "The main changes in this cycle were:

   - a big round of FUTEX_UNLOCK_PI improvements, fixes, cleanups and
     general restructuring

   - lockdep updates such as new checks for lock_downgrade()

   - introduce the new atomic_try_cmpxchg() locking API and use it to
     optimize refcount code generation

   - ... plus misc fixes, updates and cleanups"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits)
  MAINTAINERS: Add FUTEX SUBSYSTEM
  futex: Clarify mark_wake_futex memory barrier usage
  futex: Fix small (and harmless looking) inconsistencies
  futex: Avoid freeing an active timer
  rtmutex: Plug preempt count leak in rt_mutex_futex_unlock()
  rtmutex: Fix more prio comparisons
  rtmutex: Fix PI chain order integrity
  sched,tracing: Update trace_sched_pi_setprio()
  sched/rtmutex: Refactor rt_mutex_setprio()
  rtmutex: Clean up
  sched/deadline/rtmutex: Dont miss the dl_runtime/dl_period update
  sched/rtmutex/deadline: Fix a PI crash for deadline tasks
  rtmutex: Deboost before waking up the top waiter
  locking/ww-mutex: Limit stress test to 2 seconds
  locking/atomic: Fix atomic_try_cmpxchg() semantics
  lockdep: Fix per-cpu static objects
  futex: Drop hb->lock before enqueueing on the rtmutex
  futex: Futex_unlock_pi() determinism
  futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()
  futex,rt_mutex: Restructure rt_mutex_finish_proxy_lock()
  ...
This commit is contained in:
Linus Torvalds
2017-05-01 19:36:00 -07:00
29 changed files with 1234 additions and 613 deletions

View File

@@ -660,6 +660,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
struct lockdep_subclass_key *key;
struct hlist_head *hash_head;
struct lock_class *class;
bool is_static = false;
if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
debug_locks_off();
@@ -673,10 +674,23 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
/*
* Static locks do not have their class-keys yet - for them the key
* is the lock object itself:
* is the lock object itself. If the lock is in the per cpu area,
* the canonical address of the lock (per cpu offset removed) is
* used.
*/
if (unlikely(!lock->key))
lock->key = (void *)lock;
if (unlikely(!lock->key)) {
unsigned long can_addr, addr = (unsigned long)lock;
if (__is_kernel_percpu_address(addr, &can_addr))
lock->key = (void *)can_addr;
else if (__is_module_percpu_address(addr, &can_addr))
lock->key = (void *)can_addr;
else if (static_obj(lock))
lock->key = (void *)lock;
else
return ERR_PTR(-EINVAL);
is_static = true;
}
/*
* NOTE: the class-key must be unique. For dynamic locks, a static
@@ -708,7 +722,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
}
}
return NULL;
return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
}
/*
@@ -726,19 +740,18 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
class = look_up_lock_class(lock, subclass);
if (likely(class))
if (likely(!IS_ERR_OR_NULL(class)))
goto out_set_class_cache;
/*
* Debug-check: all keys must be persistent!
*/
if (!static_obj(lock->key)) {
*/
if (IS_ERR(class)) {
debug_locks_off();
printk("INFO: trying to register non-static key.\n");
printk("the code is fine but needs lockdep annotation.\n");
printk("turning off the locking correctness validator.\n");
dump_stack();
return NULL;
}
@@ -3419,7 +3432,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
* Clearly if the lock hasn't been acquired _ever_, we're not
* holding it either, so report failure.
*/
if (!class)
if (IS_ERR_OR_NULL(class))
return 0;
/*
@@ -3437,13 +3450,67 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
return 0;
}
/* @depth must not be zero */
static struct held_lock *find_held_lock(struct task_struct *curr,
struct lockdep_map *lock,
unsigned int depth, int *idx)
{
struct held_lock *ret, *hlock, *prev_hlock;
int i;
i = depth - 1;
hlock = curr->held_locks + i;
ret = hlock;
if (match_held_lock(hlock, lock))
goto out;
ret = NULL;
for (i--, prev_hlock = hlock--;
i >= 0;
i--, prev_hlock = hlock--) {
/*
* We must not cross into another context:
*/
if (prev_hlock->irq_context != hlock->irq_context) {
ret = NULL;
break;
}
if (match_held_lock(hlock, lock)) {
ret = hlock;
break;
}
}
out:
*idx = i;
return ret;
}
static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
int idx)
{
struct held_lock *hlock;
for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) {
if (!__lock_acquire(hlock->instance,
hlock_class(hlock)->subclass,
hlock->trylock,
hlock->read, hlock->check,
hlock->hardirqs_off,
hlock->nest_lock, hlock->acquire_ip,
hlock->references, hlock->pin_count))
return 1;
}
return 0;
}
static int
__lock_set_class(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, unsigned int subclass,
unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock, *prev_hlock;
struct held_lock *hlock;
struct lock_class *class;
unsigned int depth;
int i;
@@ -3456,21 +3523,10 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
if (DEBUG_LOCKS_WARN_ON(!depth))
return 0;
prev_hlock = NULL;
for (i = depth-1; i >= 0; i--) {
hlock = curr->held_locks + i;
/*
* We must not cross into another context:
*/
if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
break;
if (match_held_lock(hlock, lock))
goto found_it;
prev_hlock = hlock;
}
return print_unlock_imbalance_bug(curr, lock, ip);
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock)
return print_unlock_imbalance_bug(curr, lock, ip);
found_it:
lockdep_init_map(lock, name, key, 0);
class = register_lock_class(lock, subclass, 0);
hlock->class_idx = class - lock_classes + 1;
@@ -3478,15 +3534,46 @@ found_it:
curr->lockdep_depth = i;
curr->curr_chain_key = hlock->prev_chain_key;
for (; i < depth; i++) {
hlock = curr->held_locks + i;
if (!__lock_acquire(hlock->instance,
hlock_class(hlock)->subclass, hlock->trylock,
hlock->read, hlock->check, hlock->hardirqs_off,
hlock->nest_lock, hlock->acquire_ip,
hlock->references, hlock->pin_count))
return 0;
}
if (reacquire_held_locks(curr, depth, i))
return 0;
/*
* I took it apart and put it back together again, except now I have
* these 'spare' parts.. where shall I put them.
*/
if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
return 0;
return 1;
}
static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock;
unsigned int depth;
int i;
depth = curr->lockdep_depth;
/*
* This function is about (re)setting the class of a held lock,
* yet we're not actually holding any locks. Naughty user!
*/
if (DEBUG_LOCKS_WARN_ON(!depth))
return 0;
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock)
return print_unlock_imbalance_bug(curr, lock, ip);
curr->lockdep_depth = i;
curr->curr_chain_key = hlock->prev_chain_key;
WARN(hlock->read, "downgrading a read lock");
hlock->read = 1;
hlock->acquire_ip = ip;
if (reacquire_held_locks(curr, depth, i))
return 0;
/*
* I took it apart and put it back together again, except now I have
@@ -3508,7 +3595,7 @@ static int
__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock, *prev_hlock;
struct held_lock *hlock;
unsigned int depth;
int i;
@@ -3527,21 +3614,10 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
* Check whether the lock exists in the current stack
* of held locks:
*/
prev_hlock = NULL;
for (i = depth-1; i >= 0; i--) {
hlock = curr->held_locks + i;
/*
* We must not cross into another context:
*/
if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
break;
if (match_held_lock(hlock, lock))
goto found_it;
prev_hlock = hlock;
}
return print_unlock_imbalance_bug(curr, lock, ip);
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock)
return print_unlock_imbalance_bug(curr, lock, ip);
found_it:
if (hlock->instance == lock)
lock_release_holdtime(hlock);
@@ -3568,15 +3644,8 @@ found_it:
curr->lockdep_depth = i;
curr->curr_chain_key = hlock->prev_chain_key;
for (i++; i < depth; i++) {
hlock = curr->held_locks + i;
if (!__lock_acquire(hlock->instance,
hlock_class(hlock)->subclass, hlock->trylock,
hlock->read, hlock->check, hlock->hardirqs_off,
hlock->nest_lock, hlock->acquire_ip,
hlock->references, hlock->pin_count))
return 0;
}
if (reacquire_held_locks(curr, depth, i + 1))
return 0;
/*
* We had N bottles of beer on the wall, we drank one, but now
@@ -3741,6 +3810,23 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
}
EXPORT_SYMBOL_GPL(lock_set_class);
void lock_downgrade(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
current->lockdep_recursion = 1;
check_flags(flags);
if (__lock_downgrade(lock, ip))
check_chain_key(current);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(lock_downgrade);
/*
* We are not always called with irqs disabled - do that here,
* and also avoid lockdep recursion:
@@ -3903,7 +3989,7 @@ static void
__lock_contended(struct lockdep_map *lock, unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock, *prev_hlock;
struct held_lock *hlock;
struct lock_class_stats *stats;
unsigned int depth;
int i, contention_point, contending_point;
@@ -3916,22 +4002,12 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
prev_hlock = NULL;
for (i = depth-1; i >= 0; i--) {
hlock = curr->held_locks + i;
/*
* We must not cross into another context:
*/
if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
break;
if (match_held_lock(hlock, lock))
goto found_it;
prev_hlock = hlock;
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock) {
print_lock_contention_bug(curr, lock, ip);
return;
}
print_lock_contention_bug(curr, lock, ip);
return;
found_it:
if (hlock->instance != lock)
return;
@@ -3955,7 +4031,7 @@ static void
__lock_acquired(struct lockdep_map *lock, unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock, *prev_hlock;
struct held_lock *hlock;
struct lock_class_stats *stats;
unsigned int depth;
u64 now, waittime = 0;
@@ -3969,22 +4045,12 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
prev_hlock = NULL;
for (i = depth-1; i >= 0; i--) {
hlock = curr->held_locks + i;
/*
* We must not cross into another context:
*/
if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
break;
if (match_held_lock(hlock, lock))
goto found_it;
prev_hlock = hlock;
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock) {
print_lock_contention_bug(curr, lock, _RET_IP_);
return;
}
print_lock_contention_bug(curr, lock, _RET_IP_);
return;
found_it:
if (hlock->instance != lock)
return;
@@ -4172,7 +4238,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
* If the class exists we look it up and zap it:
*/
class = look_up_lock_class(lock, j);
if (class)
if (!IS_ERR_OR_NULL(class))
zap_class(class);
}
/*

View File

@@ -174,12 +174,3 @@ void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
lock->name = name;
}
void
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
{
}
void rt_mutex_deadlock_account_unlock(struct task_struct *task)
{
}

View File

@@ -9,9 +9,6 @@
* This file contains macros used solely by rtmutex.c. Debug version.
*/
extern void
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);

View File

@@ -224,6 +224,12 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
}
#endif
/*
* Only use with rt_mutex_waiter_{less,equal}()
*/
#define task_to_waiter(p) \
&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
struct rt_mutex_waiter *right)
@@ -238,12 +244,30 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
* then right waiter has a dl_prio() too.
*/
if (dl_prio(left->prio))
return dl_time_before(left->task->dl.deadline,
right->task->dl.deadline);
return dl_time_before(left->deadline, right->deadline);
return 0;
}
static inline int
rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
struct rt_mutex_waiter *right)
{
if (left->prio != right->prio)
return 0;
/*
* If both waiters have dl_prio(), we check the deadlines of the
* associated tasks.
* If left waiter has a dl_prio(), and we didn't return 0 above,
* then right waiter has a dl_prio() too.
*/
if (dl_prio(left->prio))
return left->deadline == right->deadline;
return 1;
}
static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
@@ -322,72 +346,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
RB_CLEAR_NODE(&waiter->pi_tree_entry);
}
/*
* Calculate task priority from the waiter tree priority
*
* Return task->normal_prio when the waiter tree is empty or when
* the waiter is not allowed to do priority boosting
*/
int rt_mutex_getprio(struct task_struct *task)
static void rt_mutex_adjust_prio(struct task_struct *p)
{
if (likely(!task_has_pi_waiters(task)))
return task->normal_prio;
struct task_struct *pi_task = NULL;
return min(task_top_pi_waiter(task)->prio,
task->normal_prio);
}
lockdep_assert_held(&p->pi_lock);
struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
if (likely(!task_has_pi_waiters(task)))
return NULL;
if (task_has_pi_waiters(p))
pi_task = task_top_pi_waiter(p)->task;
return task_top_pi_waiter(task)->task;
}
/*
* Called by sched_setscheduler() to get the priority which will be
* effective after the change.
*/
int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
{
if (!task_has_pi_waiters(task))
return newprio;
if (task_top_pi_waiter(task)->task->prio <= newprio)
return task_top_pi_waiter(task)->task->prio;
return newprio;
}
/*
* Adjust the priority of a task, after its pi_waiters got modified.
*
* This can be both boosting and unboosting. task->pi_lock must be held.
*/
static void __rt_mutex_adjust_prio(struct task_struct *task)
{
int prio = rt_mutex_getprio(task);
if (task->prio != prio || dl_prio(prio))
rt_mutex_setprio(task, prio);
}
/*
* Adjust task priority (undo boosting). Called from the exit path of
* rt_mutex_slowunlock() and rt_mutex_slowlock().
*
* (Note: We do this outside of the protection of lock->wait_lock to
* allow the lock to be taken while or before we readjust the priority
* of task. We do not use the spin_xx_mutex() variants here as we are
* outside of the debug path.)
*/
void rt_mutex_adjust_prio(struct task_struct *task)
{
unsigned long flags;
raw_spin_lock_irqsave(&task->pi_lock, flags);
__rt_mutex_adjust_prio(task);
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
rt_mutex_setprio(p, pi_task);
}
/*
@@ -610,7 +578,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* enabled we continue, but stop the requeueing in the chain
* walk.
*/
if (waiter->prio == task->prio) {
if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
if (!detect_deadlock)
goto out_unlock_pi;
else
@@ -706,7 +674,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/* [7] Requeue the waiter in the lock waiter tree. */
rt_mutex_dequeue(lock, waiter);
/*
* Update the waiter prio fields now that we're dequeued.
*
* These values can have changed through either:
*
* sys_sched_set_scheduler() / sys_sched_setattr()
*
* or
*
* DL CBS enforcement advancing the effective deadline.
*
* Even though pi_waiters also uses these fields, and that tree is only
* updated in [11], we can do this here, since we hold [L], which
* serializes all pi_waiters access and rb_erase() does not care about
* the values of the node being removed.
*/
waiter->prio = task->prio;
waiter->deadline = task->dl.deadline;
rt_mutex_enqueue(lock, waiter);
/* [8] Release the task */
@@ -747,7 +734,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
*/
rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
rt_mutex_enqueue_pi(task, waiter);
__rt_mutex_adjust_prio(task);
rt_mutex_adjust_prio(task);
} else if (prerequeue_top_waiter == waiter) {
/*
@@ -763,7 +750,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
rt_mutex_dequeue_pi(task, waiter);
waiter = rt_mutex_top_waiter(lock);
rt_mutex_enqueue_pi(task, waiter);
__rt_mutex_adjust_prio(task);
rt_mutex_adjust_prio(task);
} else {
/*
* Nothing changed. No need to do any priority
@@ -833,6 +820,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
struct rt_mutex_waiter *waiter)
{
lockdep_assert_held(&lock->wait_lock);
/*
* Before testing whether we can acquire @lock, we set the
* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -892,7 +881,8 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
* the top waiter priority (kernel view),
* @task lost.
*/
if (task->prio >= rt_mutex_top_waiter(lock)->prio)
if (!rt_mutex_waiter_less(task_to_waiter(task),
rt_mutex_top_waiter(lock)))
return 0;
/*
@@ -938,8 +928,6 @@ takeit:
*/
rt_mutex_set_owner(lock, task);
rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
@@ -960,6 +948,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex *next_lock;
int chain_walk = 0, res;
lockdep_assert_held(&lock->wait_lock);
/*
* Early deadlock detection. We really don't want the task to
* enqueue on itself just to untangle the mess later. It's not
@@ -973,10 +963,11 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
return -EDEADLK;
raw_spin_lock(&task->pi_lock);
__rt_mutex_adjust_prio(task);
rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
waiter->prio = task->prio;
waiter->deadline = task->dl.deadline;
/* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock))
@@ -995,7 +986,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter);
__rt_mutex_adjust_prio(owner);
rt_mutex_adjust_prio(owner);
if (owner->pi_blocked_on)
chain_walk = 1;
} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
@@ -1047,12 +1038,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
waiter = rt_mutex_top_waiter(lock);
/*
* Remove it from current->pi_waiters. We do not adjust a
* possible priority boost right now. We execute wakeup in the
* boosted mode and go back to normal after releasing
* lock->wait_lock.
* Remove it from current->pi_waiters and deboost.
*
* We must in fact deboost here in order to ensure we call
* rt_mutex_setprio() to update p->pi_top_task before the
* task unblocks.
*/
rt_mutex_dequeue_pi(current, waiter);
rt_mutex_adjust_prio(current);
/*
* As we are waking up the top waiter, and the waiter stays
@@ -1064,9 +1057,19 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
*/
lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
raw_spin_unlock(&current->pi_lock);
/*
* We deboosted before waking the top waiter task such that we don't
* run two tasks with the 'same' priority (and ensure the
* p->pi_top_task pointer points to a blocked task). This however can
* lead to priority inversion if we would get preempted after the
* deboost but before waking our donor task, hence the preempt_disable()
* before unlock.
*
* Pairs with preempt_enable() in rt_mutex_postunlock();
*/
preempt_disable();
wake_q_add(wake_q, waiter->task);
raw_spin_unlock(&current->pi_lock);
}
/*
@@ -1082,6 +1085,8 @@ static void remove_waiter(struct rt_mutex *lock,
struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex *next_lock;
lockdep_assert_held(&lock->wait_lock);
raw_spin_lock(&current->pi_lock);
rt_mutex_dequeue(lock, waiter);
current->pi_blocked_on = NULL;
@@ -1101,7 +1106,7 @@ static void remove_waiter(struct rt_mutex *lock,
if (rt_mutex_has_waiters(lock))
rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
__rt_mutex_adjust_prio(owner);
rt_mutex_adjust_prio(owner);
/* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner);
@@ -1140,8 +1145,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
if (!waiter || (waiter->prio == task->prio &&
!dl_prio(task->prio))) {
if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
@@ -1155,6 +1159,14 @@ void rt_mutex_adjust_pi(struct task_struct *task)
next_lock, NULL, task);
}
void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
{
debug_rt_mutex_init_waiter(waiter);
RB_CLEAR_NODE(&waiter->pi_tree_entry);
RB_CLEAR_NODE(&waiter->tree_entry);
waiter->task = NULL;
}
/**
* __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
* @lock: the rt_mutex to take
@@ -1237,9 +1249,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
unsigned long flags;
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
RB_CLEAR_NODE(&waiter.pi_tree_entry);
RB_CLEAR_NODE(&waiter.tree_entry);
rt_mutex_init_waiter(&waiter);
/*
* Technically we could use raw_spin_[un]lock_irq() here, but this can
@@ -1330,7 +1340,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
/*
* Slow path to release a rt-mutex.
* Return whether the current task needs to undo a potential priority boosting.
*
* Return whether the current task needs to call rt_mutex_postunlock().
*/
static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
struct wake_q_head *wake_q)
@@ -1342,8 +1353,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
debug_rt_mutex_unlock(lock);
rt_mutex_deadlock_account_unlock(current);
/*
* We must be careful here if the fast path is enabled. If we
* have no waiters queued we cannot set owner to NULL here
@@ -1390,11 +1399,9 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
* Queue the next waiter for wakeup once we release the wait_lock.
*/
mark_wakeup_next_waiter(wake_q, lock);
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
/* check PI boosting */
return true;
return true; /* call rt_mutex_postunlock() */
}
/*
@@ -1409,11 +1416,10 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
enum rtmutex_chainwalk chwalk))
{
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
return 0;
} else
return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
}
static inline int
@@ -1425,24 +1431,33 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
enum rtmutex_chainwalk chwalk))
{
if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
return 0;
} else
return slowfn(lock, state, timeout, chwalk);
return slowfn(lock, state, timeout, chwalk);
}
static inline int
rt_mutex_fasttrylock(struct rt_mutex *lock,
int (*slowfn)(struct rt_mutex *lock))
{
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
return 1;
}
return slowfn(lock);
}
/*
* Performs the wakeup of the the top-waiter and re-enables preemption.
*/
void rt_mutex_postunlock(struct wake_q_head *wake_q)
{
wake_up_q(wake_q);
/* Pairs with preempt_disable() in rt_mutex_slowunlock() */
preempt_enable();
}
static inline void
rt_mutex_fastunlock(struct rt_mutex *lock,
bool (*slowfn)(struct rt_mutex *lock,
@@ -1450,18 +1465,11 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
{
DEFINE_WAKE_Q(wake_q);
if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
return;
} else {
bool deboost = slowfn(lock, &wake_q);
wake_up_q(&wake_q);
/* Undo pi boosting if necessary: */
if (deboost)
rt_mutex_adjust_prio(current);
}
if (slowfn(lock, &wake_q))
rt_mutex_postunlock(&wake_q);
}
/**
@@ -1495,16 +1503,11 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
/*
* Futex variant with full deadlock detection.
* Futex variant, must not use fastpath.
*/
int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *timeout)
int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
{
might_sleep();
return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
RT_MUTEX_FULL_CHAINWALK,
rt_mutex_slowlock);
return rt_mutex_slowtrylock(lock);
}
/**
@@ -1563,20 +1566,43 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
/**
* rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
* @lock: the rt_mutex to be unlocked
*
* Returns: true/false indicating whether priority adjustment is
* required or not.
* Futex variant, that since futex variants do not use the fast-path, can be
* simple and will not need to retry.
*/
bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wqh)
bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wake_q)
{
if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
rt_mutex_deadlock_account_unlock(current);
return false;
lockdep_assert_held(&lock->wait_lock);
debug_rt_mutex_unlock(lock);
if (!rt_mutex_has_waiters(lock)) {
lock->owner = NULL;
return false; /* done */
}
return rt_mutex_slowunlock(lock, wqh);
/*
* We've already deboosted, mark_wakeup_next_waiter() will
* retain preempt_disabled when we drop the wait_lock, to
* avoid inversion prior to the wakeup. preempt_disable()
* therein pairs with rt_mutex_postunlock().
*/
mark_wakeup_next_waiter(wake_q, lock);
return true; /* call postunlock() */
}
void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
{
DEFINE_WAKE_Q(wake_q);
bool postunlock;
raw_spin_lock_irq(&lock->wait_lock);
postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
raw_spin_unlock_irq(&lock->wait_lock);
if (postunlock)
rt_mutex_postunlock(&wake_q);
}
/**
@@ -1637,7 +1663,6 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
__rt_mutex_init(lock, NULL);
debug_rt_mutex_proxy_lock(lock, proxy_owner);
rt_mutex_set_owner(lock, proxy_owner);
rt_mutex_deadlock_account_lock(lock, proxy_owner);
}
/**
@@ -1657,7 +1682,37 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
{
debug_rt_mutex_proxy_unlock(lock);
rt_mutex_set_owner(lock, NULL);
rt_mutex_deadlock_account_unlock(proxy_owner);
}
int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task)
{
int ret;
if (try_to_take_rt_mutex(lock, task, NULL))
return 1;
/* We enforce deadlock detection for futexes */
ret = task_blocks_on_rt_mutex(lock, waiter, task,
RT_MUTEX_FULL_CHAINWALK);
if (ret && !rt_mutex_owner(lock)) {
/*
* Reset the return value. We might have
* returned with -EDEADLK and the owner
* released the lock while we were walking the
* pi chain. Let the waiter sort it out.
*/
ret = 0;
}
if (unlikely(ret))
remove_waiter(lock, waiter);
debug_rt_mutex_print_deadlock(waiter);
return ret;
}
/**
@@ -1680,33 +1735,9 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
int ret;
raw_spin_lock_irq(&lock->wait_lock);
if (try_to_take_rt_mutex(lock, task, NULL)) {
raw_spin_unlock_irq(&lock->wait_lock);
return 1;
}
/* We enforce deadlock detection for futexes */
ret = task_blocks_on_rt_mutex(lock, waiter, task,
RT_MUTEX_FULL_CHAINWALK);
if (ret && !rt_mutex_owner(lock)) {
/*
* Reset the return value. We might have
* returned with -EDEADLK and the owner
* released the lock while we were walking the
* pi chain. Let the waiter sort it out.
*/
ret = 0;
}
if (unlikely(ret))
remove_waiter(lock, waiter);
ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
raw_spin_unlock_irq(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
return ret;
}
@@ -1731,21 +1762,23 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
}
/**
* rt_mutex_finish_proxy_lock() - Complete lock acquisition
* rt_mutex_wait_proxy_lock() - Wait for lock acquisition
* @lock: the rt_mutex we were woken on
* @to: the timeout, null if none. hrtimer should already have
* been started.
* @waiter: the pre-initialized rt_mutex_waiter
*
* Complete the lock acquisition started our behalf by another thread.
* Wait for the the lock acquisition started on our behalf by
* rt_mutex_start_proxy_lock(). Upon failure, the caller must call
* rt_mutex_cleanup_proxy_lock().
*
* Returns:
* 0 - success
* <0 - error, one of -EINTR, -ETIMEDOUT
*
* Special API call for PI-futex requeue support
* Special API call for PI-futex support
*/
int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter)
{
@@ -1758,8 +1791,45 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
/* sleep on the mutex */
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
if (unlikely(ret))
raw_spin_unlock_irq(&lock->wait_lock);
return ret;
}
/**
* rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition
* @lock: the rt_mutex we were woken on
* @waiter: the pre-initialized rt_mutex_waiter
*
* Attempt to clean up after a failed rt_mutex_wait_proxy_lock().
*
* Unless we acquired the lock; we're still enqueued on the wait-list and can
* in fact still be granted ownership until we're removed. Therefore we can
* find we are in fact the owner and must disregard the
* rt_mutex_wait_proxy_lock() failure.
*
* Returns:
* true - did the cleanup, we done.
* false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,
* caller should disregards its return value.
*
* Special API call for PI-futex support
*/
bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter)
{
bool cleanup = false;
raw_spin_lock_irq(&lock->wait_lock);
/*
* Unless we're the owner; we're still enqueued on the wait_list.
* So check if we became owner, if not, take us off the wait_list.
*/
if (rt_mutex_owner(lock) != current) {
remove_waiter(lock, waiter);
fixup_rt_mutex_waiters(lock);
cleanup = true;
}
/*
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
@@ -1769,5 +1839,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
raw_spin_unlock_irq(&lock->wait_lock);
return ret;
return cleanup;
}

View File

@@ -11,8 +11,6 @@
*/
#define rt_mutex_deadlock_check(l) (0)
#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
#define debug_rt_mutex_init_waiter(w) do { } while (0)
#define debug_rt_mutex_free_waiter(w) do { } while (0)
#define debug_rt_mutex_lock(l) do { } while (0)

View File

@@ -34,6 +34,7 @@ struct rt_mutex_waiter {
struct rt_mutex *deadlock_lock;
#endif
int prio;
u64 deadline;
};
/*
@@ -103,16 +104,26 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task);
extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task);
extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter);
extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wqh);
extern void rt_mutex_adjust_prio(struct task_struct *task);
extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter);
extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter);
extern int rt_mutex_futex_trylock(struct rt_mutex *l);
extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wqh);
extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
#ifdef CONFIG_DEBUG_RT_MUTEXES
# include "rtmutex-debug.h"

View File

@@ -124,10 +124,8 @@ EXPORT_SYMBOL(up_write);
*/
void downgrade_write(struct rw_semaphore *sem)
{
/*
* lockdep: a downgraded write will live on as a write
* dependency.
*/
lock_downgrade(&sem->dep_map, _RET_IP_);
rwsem_set_reader_owned(sem);
__downgrade_write(sem);
}

View File

@@ -353,8 +353,8 @@ static int test_cycle(unsigned int ncpus)
struct stress {
struct work_struct work;
struct ww_mutex *locks;
unsigned long timeout;
int nlocks;
int nloops;
};
static int *get_random_order(int count)
@@ -398,12 +398,11 @@ static void stress_inorder_work(struct work_struct *work)
if (!order)
return;
ww_acquire_init(&ctx, &ww_class);
do {
int contended = -1;
int n, err;
ww_acquire_init(&ctx, &ww_class);
retry:
err = 0;
for (n = 0; n < nlocks; n++) {
@@ -433,9 +432,9 @@ retry:
__func__, err);
break;
}
} while (--stress->nloops);
ww_acquire_fini(&ctx);
ww_acquire_fini(&ctx);
} while (!time_after(jiffies, stress->timeout));
kfree(order);
kfree(stress);
@@ -470,9 +469,9 @@ static void stress_reorder_work(struct work_struct *work)
kfree(order);
order = NULL;
ww_acquire_init(&ctx, &ww_class);
do {
ww_acquire_init(&ctx, &ww_class);
list_for_each_entry(ll, &locks, link) {
err = ww_mutex_lock(ll->lock, &ctx);
if (!err)
@@ -495,9 +494,9 @@ static void stress_reorder_work(struct work_struct *work)
dummy_load(stress);
list_for_each_entry(ll, &locks, link)
ww_mutex_unlock(ll->lock);
} while (--stress->nloops);
ww_acquire_fini(&ctx);
ww_acquire_fini(&ctx);
} while (!time_after(jiffies, stress->timeout));
out:
list_for_each_entry_safe(ll, ln, &locks, link)
@@ -523,7 +522,7 @@ static void stress_one_work(struct work_struct *work)
__func__, err);
break;
}
} while (--stress->nloops);
} while (!time_after(jiffies, stress->timeout));
kfree(stress);
}
@@ -533,7 +532,7 @@ static void stress_one_work(struct work_struct *work)
#define STRESS_ONE BIT(2)
#define STRESS_ALL (STRESS_INORDER | STRESS_REORDER | STRESS_ONE)
static int stress(int nlocks, int nthreads, int nloops, unsigned int flags)
static int stress(int nlocks, int nthreads, unsigned int flags)
{
struct ww_mutex *locks;
int n;
@@ -575,7 +574,7 @@ static int stress(int nlocks, int nthreads, int nloops, unsigned int flags)
INIT_WORK(&stress->work, fn);
stress->locks = locks;
stress->nlocks = nlocks;
stress->nloops = nloops;
stress->timeout = jiffies + 2*HZ;
queue_work(wq, &stress->work);
nthreads--;
@@ -619,15 +618,15 @@ static int __init test_ww_mutex_init(void)
if (ret)
return ret;
ret = stress(16, 2*ncpus, 1<<10, STRESS_INORDER);
ret = stress(16, 2*ncpus, STRESS_INORDER);
if (ret)
return ret;
ret = stress(16, 2*ncpus, 1<<10, STRESS_REORDER);
ret = stress(16, 2*ncpus, STRESS_REORDER);
if (ret)
return ret;
ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
ret = stress(4095, hweight32(STRESS_ALL)*ncpus, STRESS_ALL);
if (ret)
return ret;