Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "The main changes in this cycle were: - A comprehensive rewrite of the robust/PI futex code's exit handling to fix various exit races. (Thomas Gleixner et al) - Rework the generic REFCOUNT_FULL implementation using atomic_fetch_* operations so that the performance impact of the cmpxchg() loops is mitigated for common refcount operations. With these performance improvements the generic implementation of refcount_t should be good enough for everybody - and this got confirmed by performance testing, so remove ARCH_HAS_REFCOUNT and REFCOUNT_FULL entirely, leaving the generic implementation enabled unconditionally. (Will Deacon) - Other misc changes, fixes, cleanups" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits) lkdtm: Remove references to CONFIG_REFCOUNT_FULL locking/refcount: Remove unused 'refcount_error_report()' function locking/refcount: Consolidate implementations of refcount_t locking/refcount: Consolidate REFCOUNT_{MAX,SATURATED} definitions locking/refcount: Move saturation warnings out of line locking/refcount: Improve performance of generic REFCOUNT_FULL code locking/refcount: Move the bulk of the REFCOUNT_FULL implementation into the <linux/refcount.h> header locking/refcount: Remove unused refcount_*_checked() variants locking/refcount: Ensure integer operands are treated as signed locking/refcount: Define constants for saturation and max refcount values futex: Prevent exit livelock futex: Provide distinct return value when owner is exiting futex: Add mutex around futex exit futex: Provide state handling for exec() as well futex: Sanitize exit state handling futex: Mark the begin of futex exit explicitly futex: Set task::futex_state to DEAD right after handling futex exit futex: Split futex_mm_release() for exit/exec exit/exec: Seperate mm_release() futex: Replace PF_EXITPIDONE with a state ...
This commit is contained in:
@@ -339,7 +339,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
||||
* up_read_non_owner(). The rwsem_release() is called
|
||||
* here to release the lock from lockdep's perspective.
|
||||
*/
|
||||
rwsem_release(¤t->mm->mmap_sem.dep_map, 1, _RET_IP_);
|
||||
rwsem_release(¤t->mm->mmap_sem.dep_map, _RET_IP_);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -336,7 +336,7 @@ static void lockdep_acquire_cpus_lock(void)
|
||||
|
||||
static void lockdep_release_cpus_lock(void)
|
||||
{
|
||||
rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_);
|
||||
rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, _THIS_IP_);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -437,7 +437,7 @@ static void exit_mm(void)
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct core_state *core_state;
|
||||
|
||||
mm_release(current, mm);
|
||||
exit_mm_release(current, mm);
|
||||
if (!mm)
|
||||
return;
|
||||
sync_mm_rss(mm);
|
||||
@@ -746,32 +746,12 @@ void __noreturn do_exit(long code)
|
||||
*/
|
||||
if (unlikely(tsk->flags & PF_EXITING)) {
|
||||
pr_alert("Fixing recursive fault but reboot is needed!\n");
|
||||
/*
|
||||
* We can do this unlocked here. The futex code uses
|
||||
* this flag just to verify whether the pi state
|
||||
* cleanup has been done or not. In the worst case it
|
||||
* loops once more. We pretend that the cleanup was
|
||||
* done as there is no way to return. Either the
|
||||
* OWNER_DIED bit is set by now or we push the blocked
|
||||
* task into the wait for ever nirwana as well.
|
||||
*/
|
||||
tsk->flags |= PF_EXITPIDONE;
|
||||
futex_exit_recursive(tsk);
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
schedule();
|
||||
}
|
||||
|
||||
exit_signals(tsk); /* sets PF_EXITING */
|
||||
/*
|
||||
* Ensure that all new tsk->pi_lock acquisitions must observe
|
||||
* PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
|
||||
*/
|
||||
smp_mb();
|
||||
/*
|
||||
* Ensure that we must observe the pi_state in exit_mm() ->
|
||||
* mm_release() -> exit_pi_state_list().
|
||||
*/
|
||||
raw_spin_lock_irq(&tsk->pi_lock);
|
||||
raw_spin_unlock_irq(&tsk->pi_lock);
|
||||
|
||||
if (unlikely(in_atomic())) {
|
||||
pr_info("note: %s[%d] exited with preempt_count %d\n",
|
||||
@@ -846,12 +826,6 @@ void __noreturn do_exit(long code)
|
||||
* Make sure we are holding no locks:
|
||||
*/
|
||||
debug_check_no_locks_held();
|
||||
/*
|
||||
* We can do this unlocked here. The futex code uses this flag
|
||||
* just to verify whether the pi state cleanup has been done
|
||||
* or not. In the worst case it loops once more.
|
||||
*/
|
||||
tsk->flags |= PF_EXITPIDONE;
|
||||
|
||||
if (tsk->io_context)
|
||||
exit_io_context(tsk);
|
||||
|
@@ -1283,24 +1283,8 @@ static int wait_for_vfork_done(struct task_struct *child,
|
||||
* restoring the old one. . .
|
||||
* Eric Biederman 10 January 1998
|
||||
*/
|
||||
void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
/* Get rid of any futexes when releasing the mm */
|
||||
#ifdef CONFIG_FUTEX
|
||||
if (unlikely(tsk->robust_list)) {
|
||||
exit_robust_list(tsk);
|
||||
tsk->robust_list = NULL;
|
||||
}
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (unlikely(tsk->compat_robust_list)) {
|
||||
compat_exit_robust_list(tsk);
|
||||
tsk->compat_robust_list = NULL;
|
||||
}
|
||||
#endif
|
||||
if (unlikely(!list_empty(&tsk->pi_state_list)))
|
||||
exit_pi_state_list(tsk);
|
||||
#endif
|
||||
|
||||
uprobe_free_utask(tsk);
|
||||
|
||||
/* Get rid of any cached register state */
|
||||
@@ -1333,6 +1317,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
complete_vfork_done(tsk);
|
||||
}
|
||||
|
||||
void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
futex_exit_release(tsk);
|
||||
mm_release(tsk, mm);
|
||||
}
|
||||
|
||||
void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
|
||||
{
|
||||
futex_exec_release(tsk);
|
||||
mm_release(tsk, mm);
|
||||
}
|
||||
|
||||
/**
|
||||
* dup_mm() - duplicates an existing mm structure
|
||||
* @tsk: the task_struct with which the new mm will be associated.
|
||||
@@ -2124,14 +2120,8 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
#ifdef CONFIG_BLOCK
|
||||
p->plug = NULL;
|
||||
#endif
|
||||
#ifdef CONFIG_FUTEX
|
||||
p->robust_list = NULL;
|
||||
#ifdef CONFIG_COMPAT
|
||||
p->compat_robust_list = NULL;
|
||||
#endif
|
||||
INIT_LIST_HEAD(&p->pi_state_list);
|
||||
p->pi_state_cache = NULL;
|
||||
#endif
|
||||
futex_init_task(p);
|
||||
|
||||
/*
|
||||
* sigaltstack should be cleared when sharing the same VM
|
||||
*/
|
||||
|
326
kernel/futex.c
326
kernel/futex.c
@@ -325,6 +325,12 @@ static inline bool should_fail_futex(bool fshared)
|
||||
}
|
||||
#endif /* CONFIG_FAIL_FUTEX */
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
static void compat_exit_robust_list(struct task_struct *curr);
|
||||
#else
|
||||
static inline void compat_exit_robust_list(struct task_struct *curr) { }
|
||||
#endif
|
||||
|
||||
static inline void futex_get_mm(union futex_key *key)
|
||||
{
|
||||
mmgrab(key->private.mm);
|
||||
@@ -890,7 +896,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
|
||||
* Kernel cleans up PI-state, but userspace is likely hosed.
|
||||
* (Robust-futex cleanup is separate and might save the day for userspace.)
|
||||
*/
|
||||
void exit_pi_state_list(struct task_struct *curr)
|
||||
static void exit_pi_state_list(struct task_struct *curr)
|
||||
{
|
||||
struct list_head *next, *head = &curr->pi_state_list;
|
||||
struct futex_pi_state *pi_state;
|
||||
@@ -960,7 +966,8 @@ void exit_pi_state_list(struct task_struct *curr)
|
||||
}
|
||||
raw_spin_unlock_irq(&curr->pi_lock);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void exit_pi_state_list(struct task_struct *curr) { }
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -1169,16 +1176,47 @@ out_error:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_for_owner_exiting - Block until the owner has exited
|
||||
* @exiting: Pointer to the exiting task
|
||||
*
|
||||
* Caller must hold a refcount on @exiting.
|
||||
*/
|
||||
static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
|
||||
{
|
||||
if (ret != -EBUSY) {
|
||||
WARN_ON_ONCE(exiting);
|
||||
return;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
|
||||
return;
|
||||
|
||||
mutex_lock(&exiting->futex_exit_mutex);
|
||||
/*
|
||||
* No point in doing state checking here. If the waiter got here
|
||||
* while the task was in exec()->exec_futex_release() then it can
|
||||
* have any FUTEX_STATE_* value when the waiter has acquired the
|
||||
* mutex. OK, if running, EXITING or DEAD if it reached exit()
|
||||
* already. Highly unlikely and not a problem. Just one more round
|
||||
* through the futex maze.
|
||||
*/
|
||||
mutex_unlock(&exiting->futex_exit_mutex);
|
||||
|
||||
put_task_struct(exiting);
|
||||
}
|
||||
|
||||
static int handle_exit_race(u32 __user *uaddr, u32 uval,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
u32 uval2;
|
||||
|
||||
/*
|
||||
* If PF_EXITPIDONE is not yet set, then try again.
|
||||
* If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
|
||||
* caller that the alleged owner is busy.
|
||||
*/
|
||||
if (tsk && !(tsk->flags & PF_EXITPIDONE))
|
||||
return -EAGAIN;
|
||||
if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* Reread the user space value to handle the following situation:
|
||||
@@ -1196,8 +1234,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
|
||||
* *uaddr = 0xC0000000; tsk = get_task(PID);
|
||||
* } if (!tsk->flags & PF_EXITING) {
|
||||
* ... attach();
|
||||
* tsk->flags |= PF_EXITPIDONE; } else {
|
||||
* if (!(tsk->flags & PF_EXITPIDONE))
|
||||
* tsk->futex_state = } else {
|
||||
* FUTEX_STATE_DEAD; if (tsk->futex_state !=
|
||||
* FUTEX_STATE_DEAD)
|
||||
* return -EAGAIN;
|
||||
* return -ESRCH; <--- FAIL
|
||||
* }
|
||||
@@ -1228,7 +1267,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
|
||||
* it after doing proper sanity checks.
|
||||
*/
|
||||
static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
|
||||
struct futex_pi_state **ps)
|
||||
struct futex_pi_state **ps,
|
||||
struct task_struct **exiting)
|
||||
{
|
||||
pid_t pid = uval & FUTEX_TID_MASK;
|
||||
struct futex_pi_state *pi_state;
|
||||
@@ -1253,22 +1293,33 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to look at the task state flags to figure out,
|
||||
* whether the task is exiting. To protect against the do_exit
|
||||
* change of the task flags, we do this protected by
|
||||
* p->pi_lock:
|
||||
* We need to look at the task state to figure out, whether the
|
||||
* task is exiting. To protect against the change of the task state
|
||||
* in futex_exit_release(), we do this protected by p->pi_lock:
|
||||
*/
|
||||
raw_spin_lock_irq(&p->pi_lock);
|
||||
if (unlikely(p->flags & PF_EXITING)) {
|
||||
if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
|
||||
/*
|
||||
* The task is on the way out. When PF_EXITPIDONE is
|
||||
* set, we know that the task has finished the
|
||||
* cleanup:
|
||||
* The task is on the way out. When the futex state is
|
||||
* FUTEX_STATE_DEAD, we know that the task has finished
|
||||
* the cleanup:
|
||||
*/
|
||||
int ret = handle_exit_race(uaddr, uval, p);
|
||||
|
||||
raw_spin_unlock_irq(&p->pi_lock);
|
||||
put_task_struct(p);
|
||||
/*
|
||||
* If the owner task is between FUTEX_STATE_EXITING and
|
||||
* FUTEX_STATE_DEAD then store the task pointer and keep
|
||||
* the reference on the task struct. The calling code will
|
||||
* drop all locks, wait for the task to reach
|
||||
* FUTEX_STATE_DEAD and then drop the refcount. This is
|
||||
* required to prevent a live lock when the current task
|
||||
* preempted the exiting task between the two states.
|
||||
*/
|
||||
if (ret == -EBUSY)
|
||||
*exiting = p;
|
||||
else
|
||||
put_task_struct(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1307,7 +1358,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
|
||||
|
||||
static int lookup_pi_state(u32 __user *uaddr, u32 uval,
|
||||
struct futex_hash_bucket *hb,
|
||||
union futex_key *key, struct futex_pi_state **ps)
|
||||
union futex_key *key, struct futex_pi_state **ps,
|
||||
struct task_struct **exiting)
|
||||
{
|
||||
struct futex_q *top_waiter = futex_top_waiter(hb, key);
|
||||
|
||||
@@ -1322,7 +1374,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
|
||||
* We are the first waiter - try to look up the owner based on
|
||||
* @uval and attach to it.
|
||||
*/
|
||||
return attach_to_pi_owner(uaddr, uval, key, ps);
|
||||
return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
|
||||
}
|
||||
|
||||
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
|
||||
@@ -1350,6 +1402,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
|
||||
* lookup
|
||||
* @task: the task to perform the atomic lock work for. This will
|
||||
* be "current" except in the case of requeue pi.
|
||||
* @exiting: Pointer to store the task pointer of the owner task
|
||||
* which is in the middle of exiting
|
||||
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
|
||||
*
|
||||
* Return:
|
||||
@@ -1358,11 +1412,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
|
||||
* - <0 - error
|
||||
*
|
||||
* The hb->lock and futex_key refs shall be held by the caller.
|
||||
*
|
||||
* @exiting is only set when the return value is -EBUSY. If so, this holds
|
||||
* a refcount on the exiting task on return and the caller needs to drop it
|
||||
* after waiting for the exit to complete.
|
||||
*/
|
||||
static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
|
||||
union futex_key *key,
|
||||
struct futex_pi_state **ps,
|
||||
struct task_struct *task, int set_waiters)
|
||||
struct task_struct *task,
|
||||
struct task_struct **exiting,
|
||||
int set_waiters)
|
||||
{
|
||||
u32 uval, newval, vpid = task_pid_vnr(task);
|
||||
struct futex_q *top_waiter;
|
||||
@@ -1432,7 +1492,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
|
||||
* attach to the owner. If that fails, no harm done, we only
|
||||
* set the FUTEX_WAITERS bit in the user space variable.
|
||||
*/
|
||||
return attach_to_pi_owner(uaddr, newval, key, ps);
|
||||
return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1480,7 +1540,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
|
||||
|
||||
/*
|
||||
* Queue the task for later wakeup for after we've released
|
||||
* the hb->lock. wake_q_add() grabs reference to p.
|
||||
* the hb->lock.
|
||||
*/
|
||||
wake_q_add_safe(wake_q, p);
|
||||
}
|
||||
@@ -1850,6 +1910,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
||||
* @key1: the from futex key
|
||||
* @key2: the to futex key
|
||||
* @ps: address to store the pi_state pointer
|
||||
* @exiting: Pointer to store the task pointer of the owner task
|
||||
* which is in the middle of exiting
|
||||
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
|
||||
*
|
||||
* Try and get the lock on behalf of the top waiter if we can do it atomically.
|
||||
@@ -1857,16 +1919,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
||||
* then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
|
||||
* hb1 and hb2 must be held by the caller.
|
||||
*
|
||||
* @exiting is only set when the return value is -EBUSY. If so, this holds
|
||||
* a refcount on the exiting task on return and the caller needs to drop it
|
||||
* after waiting for the exit to complete.
|
||||
*
|
||||
* Return:
|
||||
* - 0 - failed to acquire the lock atomically;
|
||||
* - >0 - acquired the lock, return value is vpid of the top_waiter
|
||||
* - <0 - error
|
||||
*/
|
||||
static int futex_proxy_trylock_atomic(u32 __user *pifutex,
|
||||
struct futex_hash_bucket *hb1,
|
||||
struct futex_hash_bucket *hb2,
|
||||
union futex_key *key1, union futex_key *key2,
|
||||
struct futex_pi_state **ps, int set_waiters)
|
||||
static int
|
||||
futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
|
||||
struct futex_hash_bucket *hb2, union futex_key *key1,
|
||||
union futex_key *key2, struct futex_pi_state **ps,
|
||||
struct task_struct **exiting, int set_waiters)
|
||||
{
|
||||
struct futex_q *top_waiter = NULL;
|
||||
u32 curval;
|
||||
@@ -1903,7 +1969,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
|
||||
*/
|
||||
vpid = task_pid_vnr(top_waiter->task);
|
||||
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
|
||||
set_waiters);
|
||||
exiting, set_waiters);
|
||||
if (ret == 1) {
|
||||
requeue_pi_wake_futex(top_waiter, key2, hb2);
|
||||
return vpid;
|
||||
@@ -2032,6 +2098,8 @@ retry_private:
|
||||
}
|
||||
|
||||
if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
|
||||
struct task_struct *exiting = NULL;
|
||||
|
||||
/*
|
||||
* Attempt to acquire uaddr2 and wake the top waiter. If we
|
||||
* intend to requeue waiters, force setting the FUTEX_WAITERS
|
||||
@@ -2039,7 +2107,8 @@ retry_private:
|
||||
* faults rather in the requeue loop below.
|
||||
*/
|
||||
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
|
||||
&key2, &pi_state, nr_requeue);
|
||||
&key2, &pi_state,
|
||||
&exiting, nr_requeue);
|
||||
|
||||
/*
|
||||
* At this point the top_waiter has either taken uaddr2 or is
|
||||
@@ -2066,7 +2135,8 @@ retry_private:
|
||||
* If that call succeeds then we have pi_state and an
|
||||
* initial refcount on it.
|
||||
*/
|
||||
ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state);
|
||||
ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
|
||||
&pi_state, &exiting);
|
||||
}
|
||||
|
||||
switch (ret) {
|
||||
@@ -2084,17 +2154,24 @@ retry_private:
|
||||
if (!ret)
|
||||
goto retry;
|
||||
goto out;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - Owner is exiting and we just wait for the
|
||||
* - EBUSY: Owner is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - The user space value changed.
|
||||
* - EAGAIN: The user space value changed.
|
||||
*/
|
||||
double_unlock_hb(hb1, hb2);
|
||||
hb_waiters_dec(hb2);
|
||||
put_futex_key(&key2);
|
||||
put_futex_key(&key1);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
@@ -2801,6 +2878,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to;
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct task_struct *exiting = NULL;
|
||||
struct rt_mutex_waiter rt_waiter;
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q q = futex_q_init;
|
||||
@@ -2822,7 +2900,8 @@ retry:
|
||||
retry_private:
|
||||
hb = queue_lock(&q);
|
||||
|
||||
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
|
||||
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
|
||||
&exiting, 0);
|
||||
if (unlikely(ret)) {
|
||||
/*
|
||||
* Atomic work succeeded and we got the lock,
|
||||
@@ -2835,15 +2914,22 @@ retry_private:
|
||||
goto out_unlock_put_key;
|
||||
case -EFAULT:
|
||||
goto uaddr_faulted;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - Task is exiting and we just wait for the
|
||||
* - EBUSY: Task is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - The user space value changed.
|
||||
* - EAGAIN: The user space value changed.
|
||||
*/
|
||||
queue_unlock(hb);
|
||||
put_futex_key(&q.key);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
@@ -3452,11 +3538,16 @@ err_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Constants for the pending_op argument of handle_futex_death */
|
||||
#define HANDLE_DEATH_PENDING true
|
||||
#define HANDLE_DEATH_LIST false
|
||||
|
||||
/*
|
||||
* Process a futex-list entry, check whether it's owned by the
|
||||
* dying task, and do notification if so:
|
||||
*/
|
||||
static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
|
||||
static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
|
||||
bool pi, bool pending_op)
|
||||
{
|
||||
u32 uval, uninitialized_var(nval), mval;
|
||||
int err;
|
||||
@@ -3469,6 +3560,42 @@ retry:
|
||||
if (get_user(uval, uaddr))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Special case for regular (non PI) futexes. The unlock path in
|
||||
* user space has two race scenarios:
|
||||
*
|
||||
* 1. The unlock path releases the user space futex value and
|
||||
* before it can execute the futex() syscall to wake up
|
||||
* waiters it is killed.
|
||||
*
|
||||
* 2. A woken up waiter is killed before it can acquire the
|
||||
* futex in user space.
|
||||
*
|
||||
* In both cases the TID validation below prevents a wakeup of
|
||||
* potential waiters which can cause these waiters to block
|
||||
* forever.
|
||||
*
|
||||
* In both cases the following conditions are met:
|
||||
*
|
||||
* 1) task->robust_list->list_op_pending != NULL
|
||||
* @pending_op == true
|
||||
* 2) User space futex value == 0
|
||||
* 3) Regular futex: @pi == false
|
||||
*
|
||||
* If these conditions are met, it is safe to attempt waking up a
|
||||
* potential waiter without touching the user space futex value and
|
||||
* trying to set the OWNER_DIED bit. The user space futex value is
|
||||
* uncontended and the rest of the user space mutex state is
|
||||
* consistent, so a woken waiter will just take over the
|
||||
* uncontended futex. Setting the OWNER_DIED bit would create
|
||||
* inconsistent state and malfunction of the user space owner died
|
||||
* handling.
|
||||
*/
|
||||
if (pending_op && !pi && !uval) {
|
||||
futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr))
|
||||
return 0;
|
||||
|
||||
@@ -3547,7 +3674,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
|
||||
*
|
||||
* We silently return on any sign of list-walking problem.
|
||||
*/
|
||||
void exit_robust_list(struct task_struct *curr)
|
||||
static void exit_robust_list(struct task_struct *curr)
|
||||
{
|
||||
struct robust_list_head __user *head = curr->robust_list;
|
||||
struct robust_list __user *entry, *next_entry, *pending;
|
||||
@@ -3588,10 +3715,11 @@ void exit_robust_list(struct task_struct *curr)
|
||||
* A pending lock might already be on the list, so
|
||||
* don't process it twice:
|
||||
*/
|
||||
if (entry != pending)
|
||||
if (entry != pending) {
|
||||
if (handle_futex_death((void __user *)entry + futex_offset,
|
||||
curr, pi))
|
||||
curr, pi, HANDLE_DEATH_LIST))
|
||||
return;
|
||||
}
|
||||
if (rc)
|
||||
return;
|
||||
entry = next_entry;
|
||||
@@ -3605,9 +3733,118 @@ void exit_robust_list(struct task_struct *curr)
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (pending)
|
||||
if (pending) {
|
||||
handle_futex_death((void __user *)pending + futex_offset,
|
||||
curr, pip);
|
||||
curr, pip, HANDLE_DEATH_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
static void futex_cleanup(struct task_struct *tsk)
|
||||
{
|
||||
if (unlikely(tsk->robust_list)) {
|
||||
exit_robust_list(tsk);
|
||||
tsk->robust_list = NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (unlikely(tsk->compat_robust_list)) {
|
||||
compat_exit_robust_list(tsk);
|
||||
tsk->compat_robust_list = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(!list_empty(&tsk->pi_state_list)))
|
||||
exit_pi_state_list(tsk);
|
||||
}
|
||||
|
||||
/**
|
||||
* futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
|
||||
* @tsk: task to set the state on
|
||||
*
|
||||
* Set the futex exit state of the task lockless. The futex waiter code
|
||||
* observes that state when a task is exiting and loops until the task has
|
||||
* actually finished the futex cleanup. The worst case for this is that the
|
||||
* waiter runs through the wait loop until the state becomes visible.
|
||||
*
|
||||
* This is called from the recursive fault handling path in do_exit().
|
||||
*
|
||||
* This is best effort. Either the futex exit code has run already or
|
||||
* not. If the OWNER_DIED bit has been set on the futex then the waiter can
|
||||
* take it over. If not, the problem is pushed back to user space. If the
|
||||
* futex exit code did not run yet, then an already queued waiter might
|
||||
* block forever, but there is nothing which can be done about that.
|
||||
*/
|
||||
void futex_exit_recursive(struct task_struct *tsk)
|
||||
{
|
||||
/* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
|
||||
if (tsk->futex_state == FUTEX_STATE_EXITING)
|
||||
mutex_unlock(&tsk->futex_exit_mutex);
|
||||
tsk->futex_state = FUTEX_STATE_DEAD;
|
||||
}
|
||||
|
||||
static void futex_cleanup_begin(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* Prevent various race issues against a concurrent incoming waiter
|
||||
* including live locks by forcing the waiter to block on
|
||||
* tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
|
||||
* attach_to_pi_owner().
|
||||
*/
|
||||
mutex_lock(&tsk->futex_exit_mutex);
|
||||
|
||||
/*
|
||||
* Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
|
||||
*
|
||||
* This ensures that all subsequent checks of tsk->futex_state in
|
||||
* attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
|
||||
* tsk->pi_lock held.
|
||||
*
|
||||
* It guarantees also that a pi_state which was queued right before
|
||||
* the state change under tsk->pi_lock by a concurrent waiter must
|
||||
* be observed in exit_pi_state_list().
|
||||
*/
|
||||
raw_spin_lock_irq(&tsk->pi_lock);
|
||||
tsk->futex_state = FUTEX_STATE_EXITING;
|
||||
raw_spin_unlock_irq(&tsk->pi_lock);
|
||||
}
|
||||
|
||||
static void futex_cleanup_end(struct task_struct *tsk, int state)
|
||||
{
|
||||
/*
|
||||
* Lockless store. The only side effect is that an observer might
|
||||
* take another loop until it becomes visible.
|
||||
*/
|
||||
tsk->futex_state = state;
|
||||
/*
|
||||
* Drop the exit protection. This unblocks waiters which observed
|
||||
* FUTEX_STATE_EXITING to reevaluate the state.
|
||||
*/
|
||||
mutex_unlock(&tsk->futex_exit_mutex);
|
||||
}
|
||||
|
||||
void futex_exec_release(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* The state handling is done for consistency, but in the case of
|
||||
* exec() there is no way to prevent futher damage as the PID stays
|
||||
* the same. But for the unlikely and arguably buggy case that a
|
||||
* futex is held on exec(), this provides at least as much state
|
||||
* consistency protection which is possible.
|
||||
*/
|
||||
futex_cleanup_begin(tsk);
|
||||
futex_cleanup(tsk);
|
||||
/*
|
||||
* Reset the state to FUTEX_STATE_OK. The task is alive and about
|
||||
* exec a new binary.
|
||||
*/
|
||||
futex_cleanup_end(tsk, FUTEX_STATE_OK);
|
||||
}
|
||||
|
||||
void futex_exit_release(struct task_struct *tsk)
|
||||
{
|
||||
futex_cleanup_begin(tsk);
|
||||
futex_cleanup(tsk);
|
||||
futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
|
||||
}
|
||||
|
||||
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
@@ -3737,7 +3974,7 @@ static void __user *futex_uaddr(struct robust_list __user *entry,
|
||||
*
|
||||
* We silently return on any sign of list-walking problem.
|
||||
*/
|
||||
void compat_exit_robust_list(struct task_struct *curr)
|
||||
static void compat_exit_robust_list(struct task_struct *curr)
|
||||
{
|
||||
struct compat_robust_list_head __user *head = curr->compat_robust_list;
|
||||
struct robust_list __user *entry, *next_entry, *pending;
|
||||
@@ -3784,7 +4021,8 @@ void compat_exit_robust_list(struct task_struct *curr)
|
||||
if (entry != pending) {
|
||||
void __user *uaddr = futex_uaddr(entry, futex_offset);
|
||||
|
||||
if (handle_futex_death(uaddr, curr, pi))
|
||||
if (handle_futex_death(uaddr, curr, pi,
|
||||
HANDLE_DEATH_LIST))
|
||||
return;
|
||||
}
|
||||
if (rc)
|
||||
@@ -3803,7 +4041,7 @@ void compat_exit_robust_list(struct task_struct *curr)
|
||||
if (pending) {
|
||||
void __user *uaddr = futex_uaddr(pending, futex_offset);
|
||||
|
||||
handle_futex_death(uaddr, curr, pip);
|
||||
handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -4208,11 +4208,9 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the lock to the list of currently held locks - this gets
|
||||
* Remove the lock from the list of currently held locks - this gets
|
||||
* called on mutex_unlock()/spin_unlock*() (or on a failed
|
||||
* mutex_lock_interruptible()).
|
||||
*
|
||||
* @nested is an hysterical artifact, needs a tree wide cleanup.
|
||||
*/
|
||||
static int
|
||||
__lock_release(struct lockdep_map *lock, unsigned long ip)
|
||||
@@ -4491,8 +4489,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(lock_acquire);
|
||||
|
||||
void lock_release(struct lockdep_map *lock, int nested,
|
||||
unsigned long ip)
|
||||
void lock_release(struct lockdep_map *lock, unsigned long ip)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
@@ -733,6 +733,9 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
|
||||
*/
|
||||
void __sched mutex_unlock(struct mutex *lock)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_MUTEXES
|
||||
WARN_ON(in_interrupt());
|
||||
#endif
|
||||
#ifndef CONFIG_DEBUG_LOCK_ALLOC
|
||||
if (__mutex_unlock_fast(lock))
|
||||
return;
|
||||
@@ -1091,7 +1094,7 @@ err:
|
||||
err_early_kill:
|
||||
spin_unlock(&lock->wait_lock);
|
||||
debug_mutex_free_waiter(&waiter);
|
||||
mutex_release(&lock->dep_map, 1, ip);
|
||||
mutex_release(&lock->dep_map, ip);
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
@@ -1225,7 +1228,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
unsigned long owner;
|
||||
|
||||
mutex_release(&lock->dep_map, 1, ip);
|
||||
mutex_release(&lock->dep_map, ip);
|
||||
|
||||
/*
|
||||
* Release the lock before (potentially) taking the spinlock such that
|
||||
@@ -1413,6 +1416,7 @@ int __sched mutex_trylock(struct mutex *lock)
|
||||
|
||||
#ifdef CONFIG_DEBUG_MUTEXES
|
||||
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
|
||||
WARN_ON(in_interrupt());
|
||||
#endif
|
||||
|
||||
locked = __mutex_trylock(lock);
|
||||
|
@@ -1517,7 +1517,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
|
||||
mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
|
||||
if (ret)
|
||||
mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
mutex_release(&lock->dep_map, _RET_IP_);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1561,7 +1561,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
|
||||
RT_MUTEX_MIN_CHAINWALK,
|
||||
rt_mutex_slowlock);
|
||||
if (ret)
|
||||
mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
mutex_release(&lock->dep_map, _RET_IP_);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1600,7 +1600,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock);
|
||||
*/
|
||||
void __sched rt_mutex_unlock(struct rt_mutex *lock)
|
||||
{
|
||||
mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
mutex_release(&lock->dep_map, _RET_IP_);
|
||||
rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
|
||||
|
@@ -1504,7 +1504,7 @@ int __sched down_read_killable(struct rw_semaphore *sem)
|
||||
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
@@ -1546,7 +1546,7 @@ int __sched down_write_killable(struct rw_semaphore *sem)
|
||||
|
||||
if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
|
||||
__down_write_killable)) {
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
@@ -1573,7 +1573,7 @@ EXPORT_SYMBOL(down_write_trylock);
|
||||
*/
|
||||
void up_read(struct rw_semaphore *sem)
|
||||
{
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
__up_read(sem);
|
||||
}
|
||||
EXPORT_SYMBOL(up_read);
|
||||
@@ -1583,7 +1583,7 @@ EXPORT_SYMBOL(up_read);
|
||||
*/
|
||||
void up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
__up_write(sem);
|
||||
}
|
||||
EXPORT_SYMBOL(up_write);
|
||||
@@ -1639,7 +1639,7 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
|
||||
|
||||
if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
|
||||
__down_write_killable)) {
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
rwsem_release(&sem->dep_map, _RET_IP_);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
|
@@ -671,17 +671,6 @@ EXPORT_SYMBOL(__stack_chk_fail);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_REFCOUNT
|
||||
void refcount_error_report(struct pt_regs *regs, const char *err)
|
||||
{
|
||||
WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n",
|
||||
err, (void *)instruction_pointer(regs),
|
||||
current->comm, task_pid_nr(current),
|
||||
from_kuid_munged(&init_user_ns, current_uid()),
|
||||
from_kuid_munged(&init_user_ns, current_euid()));
|
||||
}
|
||||
#endif
|
||||
|
||||
core_param(panic, panic_timeout, int, 0644);
|
||||
core_param(panic_print, panic_print, ulong, 0644);
|
||||
core_param(pause_on_oops, pause_on_oops, int, 0644);
|
||||
|
@@ -248,7 +248,7 @@ static void __up_console_sem(unsigned long ip)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
mutex_release(&console_lock_dep_map, 1, ip);
|
||||
mutex_release(&console_lock_dep_map, ip);
|
||||
|
||||
printk_safe_enter_irqsave(flags);
|
||||
up(&console_sem);
|
||||
@@ -1679,20 +1679,20 @@ static int console_lock_spinning_disable_and_check(void)
|
||||
raw_spin_unlock(&console_owner_lock);
|
||||
|
||||
if (!waiter) {
|
||||
spin_release(&console_owner_dep_map, 1, _THIS_IP_);
|
||||
spin_release(&console_owner_dep_map, _THIS_IP_);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The waiter is now free to continue */
|
||||
WRITE_ONCE(console_waiter, false);
|
||||
|
||||
spin_release(&console_owner_dep_map, 1, _THIS_IP_);
|
||||
spin_release(&console_owner_dep_map, _THIS_IP_);
|
||||
|
||||
/*
|
||||
* Hand off console_lock to waiter. The waiter will perform
|
||||
* the up(). After this, the waiter is the console_lock owner.
|
||||
*/
|
||||
mutex_release(&console_lock_dep_map, 1, _THIS_IP_);
|
||||
mutex_release(&console_lock_dep_map, _THIS_IP_);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -1746,7 +1746,7 @@ static int console_trylock_spinning(void)
|
||||
/* Owner will clear console_waiter on hand off */
|
||||
while (READ_ONCE(console_waiter))
|
||||
cpu_relax();
|
||||
spin_release(&console_owner_dep_map, 1, _THIS_IP_);
|
||||
spin_release(&console_owner_dep_map, _THIS_IP_);
|
||||
|
||||
printk_safe_exit_irqrestore(flags);
|
||||
/*
|
||||
|
@@ -3106,7 +3106,7 @@ prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf
|
||||
* do an early lockdep release here:
|
||||
*/
|
||||
rq_unpin_lock(rq, rf);
|
||||
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
|
||||
spin_release(&rq->lock.dep_map, _THIS_IP_);
|
||||
#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
/* this is a valid case when another task releases the spinlock */
|
||||
rq->lock.owner = next;
|
||||
|
Reference in New Issue
Block a user