Merge tag 'v3.16-rc5' into sched/core, to refresh the branch before applying bigger tree-wide changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -1648,10 +1648,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
||||
int flags, const char *unused_dev_name,
|
||||
void *data)
|
||||
{
|
||||
struct super_block *pinned_sb = NULL;
|
||||
struct cgroup_subsys *ss;
|
||||
struct cgroup_root *root;
|
||||
struct cgroup_sb_opts opts;
|
||||
struct dentry *dentry;
|
||||
int ret;
|
||||
int i;
|
||||
bool new_sb;
|
||||
|
||||
/*
|
||||
@@ -1677,6 +1680,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Destruction of cgroup root is asynchronous, so subsystems may
|
||||
* still be dying after the previous unmount. Let's drain the
|
||||
* dying subsystems. We just need to ensure that the ones
|
||||
* unmounted previously finish dying and don't care about new ones
|
||||
* starting. Testing ref liveliness is good enough.
|
||||
*/
|
||||
for_each_subsys(ss, i) {
|
||||
if (!(opts.subsys_mask & (1 << i)) ||
|
||||
ss->root == &cgrp_dfl_root)
|
||||
continue;
|
||||
|
||||
if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
msleep(10);
|
||||
ret = restart_syscall();
|
||||
goto out_free;
|
||||
}
|
||||
cgroup_put(&ss->root->cgrp);
|
||||
}
|
||||
|
||||
for_each_root(root) {
|
||||
bool name_match = false;
|
||||
|
||||
@@ -1717,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
||||
}
|
||||
|
||||
/*
|
||||
* A root's lifetime is governed by its root cgroup.
|
||||
* tryget_live failure indicate that the root is being
|
||||
* destroyed. Wait for destruction to complete so that the
|
||||
* subsystems are free. We can use wait_queue for the wait
|
||||
* but this path is super cold. Let's just sleep for a bit
|
||||
* and retry.
|
||||
* We want to reuse @root whose lifetime is governed by its
|
||||
* ->cgrp. Let's check whether @root is alive and keep it
|
||||
* that way. As cgroup_kill_sb() can happen anytime, we
|
||||
* want to block it by pinning the sb so that @root doesn't
|
||||
* get killed before mount is complete.
|
||||
*
|
||||
* With the sb pinned, tryget_live can reliably indicate
|
||||
* whether @root can be reused. If it's being killed,
|
||||
* drain it. We can use wait_queue for the wait but this
|
||||
* path is super cold. Let's just sleep a bit and retry.
|
||||
*/
|
||||
if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
|
||||
pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
|
||||
if (IS_ERR(pinned_sb) ||
|
||||
!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
if (!IS_ERR_OR_NULL(pinned_sb))
|
||||
deactivate_super(pinned_sb);
|
||||
msleep(10);
|
||||
ret = restart_syscall();
|
||||
goto out_free;
|
||||
@@ -1770,6 +1802,16 @@ out_free:
|
||||
CGROUP_SUPER_MAGIC, &new_sb);
|
||||
if (IS_ERR(dentry) || !new_sb)
|
||||
cgroup_put(&root->cgrp);
|
||||
|
||||
/*
|
||||
* If @pinned_sb, we're reusing an existing root and holding an
|
||||
* extra ref on its sb. Mount is complete. Put the extra ref.
|
||||
*/
|
||||
if (pinned_sb) {
|
||||
WARN_ON(new_sb);
|
||||
deactivate_super(pinned_sb);
|
||||
}
|
||||
|
||||
return dentry;
|
||||
}
|
||||
|
||||
@@ -3328,7 +3370,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
|
||||
|
||||
rcu_read_lock();
|
||||
css_for_each_child(child, css) {
|
||||
if (css->flags & CSS_ONLINE) {
|
||||
if (child->flags & CSS_ONLINE) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
|
@@ -19,6 +19,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/kprobes.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/context_tracking.h>
|
||||
@@ -104,6 +105,7 @@ void context_tracking_user_enter(void)
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(context_tracking_user_enter);
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
/**
|
||||
@@ -181,6 +183,7 @@ void context_tracking_user_exit(void)
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(context_tracking_user_exit);
|
||||
|
||||
/**
|
||||
* __context_tracking_task_switch - context switch the syscall callbacks
|
||||
|
@@ -1181,7 +1181,13 @@ done:
|
||||
|
||||
int current_cpuset_is_being_rebound(void)
|
||||
{
|
||||
return task_cs(current) == cpuset_being_rebound;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = task_cs(current) == cpuset_being_rebound;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int update_relax_domain_level(struct cpuset *cs, s64 val)
|
||||
@@ -1617,7 +1623,17 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
|
||||
* resources, wait for the previously scheduled operations before
|
||||
* proceeding, so that we don't end up keep removing tasks added
|
||||
* after execution capability is restored.
|
||||
*
|
||||
* cpuset_hotplug_work calls back into cgroup core via
|
||||
* cgroup_transfer_tasks() and waiting for it from a cgroupfs
|
||||
* operation like this one can lead to a deadlock through kernfs
|
||||
* active_ref protection. Let's break the protection. Losing the
|
||||
* protection is okay as we check whether @cs is online after
|
||||
* grabbing cpuset_mutex anyway. This only happens on the legacy
|
||||
* hierarchies.
|
||||
*/
|
||||
css_get(&cs->css);
|
||||
kernfs_break_active_protection(of->kn);
|
||||
flush_work(&cpuset_hotplug_work);
|
||||
|
||||
mutex_lock(&cpuset_mutex);
|
||||
@@ -1645,6 +1661,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
|
||||
free_trial_cpuset(trialcs);
|
||||
out_unlock:
|
||||
mutex_unlock(&cpuset_mutex);
|
||||
kernfs_unbreak_active_protection(of->kn);
|
||||
css_put(&cs->css);
|
||||
return retval ?: nbytes;
|
||||
}
|
||||
|
||||
|
@@ -40,6 +40,7 @@
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mman.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -5128,6 +5129,7 @@ struct perf_mmap_event {
|
||||
int maj, min;
|
||||
u64 ino;
|
||||
u64 ino_generation;
|
||||
u32 prot, flags;
|
||||
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
@@ -5169,6 +5171,8 @@ static void perf_event_mmap_output(struct perf_event *event,
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->min);
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->ino);
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->prot);
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->flags);
|
||||
}
|
||||
|
||||
perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
|
||||
@@ -5187,6 +5191,8 @@ static void perf_event_mmap_output(struct perf_event *event,
|
||||
perf_output_put(&handle, mmap_event->min);
|
||||
perf_output_put(&handle, mmap_event->ino);
|
||||
perf_output_put(&handle, mmap_event->ino_generation);
|
||||
perf_output_put(&handle, mmap_event->prot);
|
||||
perf_output_put(&handle, mmap_event->flags);
|
||||
}
|
||||
|
||||
__output_copy(&handle, mmap_event->file_name,
|
||||
@@ -5205,6 +5211,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
|
||||
struct file *file = vma->vm_file;
|
||||
int maj = 0, min = 0;
|
||||
u64 ino = 0, gen = 0;
|
||||
u32 prot = 0, flags = 0;
|
||||
unsigned int size;
|
||||
char tmp[16];
|
||||
char *buf = NULL;
|
||||
@@ -5235,6 +5242,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
|
||||
gen = inode->i_generation;
|
||||
maj = MAJOR(dev);
|
||||
min = MINOR(dev);
|
||||
|
||||
if (vma->vm_flags & VM_READ)
|
||||
prot |= PROT_READ;
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
prot |= PROT_WRITE;
|
||||
if (vma->vm_flags & VM_EXEC)
|
||||
prot |= PROT_EXEC;
|
||||
|
||||
if (vma->vm_flags & VM_MAYSHARE)
|
||||
flags = MAP_SHARED;
|
||||
else
|
||||
flags = MAP_PRIVATE;
|
||||
|
||||
if (vma->vm_flags & VM_DENYWRITE)
|
||||
flags |= MAP_DENYWRITE;
|
||||
if (vma->vm_flags & VM_MAYEXEC)
|
||||
flags |= MAP_EXECUTABLE;
|
||||
if (vma->vm_flags & VM_LOCKED)
|
||||
flags |= MAP_LOCKED;
|
||||
if (vma->vm_flags & VM_HUGETLB)
|
||||
flags |= MAP_HUGETLB;
|
||||
|
||||
goto got_name;
|
||||
} else {
|
||||
name = (char *)arch_vma_name(vma);
|
||||
@@ -5275,6 +5304,8 @@ got_name:
|
||||
mmap_event->min = min;
|
||||
mmap_event->ino = ino;
|
||||
mmap_event->ino_generation = gen;
|
||||
mmap_event->prot = prot;
|
||||
mmap_event->flags = flags;
|
||||
|
||||
if (!(vma->vm_flags & VM_EXEC))
|
||||
mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
|
||||
@@ -5315,6 +5346,8 @@ void perf_event_mmap(struct vm_area_struct *vma)
|
||||
/* .min (attr_mmap2 only) */
|
||||
/* .ino (attr_mmap2 only) */
|
||||
/* .ino_generation (attr_mmap2 only) */
|
||||
/* .prot (attr_mmap2 only) */
|
||||
/* .flags (attr_mmap2 only) */
|
||||
};
|
||||
|
||||
perf_event_mmap_event(&mmap_event);
|
||||
@@ -6897,10 +6930,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
|
||||
/* disabled for now */
|
||||
if (attr->mmap2)
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->__reserved_1)
|
||||
return -EINVAL;
|
||||
|
||||
|
@@ -846,7 +846,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!consumer_del(uprobe, uc)) /* WARN? */
|
||||
if (WARN_ON(!consumer_del(uprobe, uc)))
|
||||
return;
|
||||
|
||||
err = register_for_each_vma(uprobe, NULL);
|
||||
@@ -927,7 +927,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,
|
||||
int ret = -ENOENT;
|
||||
|
||||
uprobe = find_uprobe(inode, offset);
|
||||
if (!uprobe)
|
||||
if (WARN_ON(!uprobe))
|
||||
return ret;
|
||||
|
||||
down_write(&uprobe->register_rwsem);
|
||||
@@ -952,7 +952,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
|
||||
struct uprobe *uprobe;
|
||||
|
||||
uprobe = find_uprobe(inode, offset);
|
||||
if (!uprobe)
|
||||
if (WARN_ON(!uprobe))
|
||||
return;
|
||||
|
||||
down_write(&uprobe->register_rwsem);
|
||||
|
@@ -1486,7 +1486,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
|
||||
total_forks++;
|
||||
spin_unlock(¤t->sighand->siglock);
|
||||
syscall_tracepoint_update(p);
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
|
||||
proc_fork_connector(p);
|
||||
cgroup_post_fork(p);
|
||||
if (clone_flags & CLONE_THREAD)
|
||||
|
@@ -455,9 +455,9 @@ EXPORT_SYMBOL_GPL(irq_alloc_hwirqs);
|
||||
*/
|
||||
void irq_free_hwirqs(unsigned int from, int cnt)
|
||||
{
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
for (i = from; cnt > 0; i++, cnt--) {
|
||||
for (i = from, j = cnt; j > 0; i++, j--) {
|
||||
irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE);
|
||||
arch_teardown_hwirq(i);
|
||||
}
|
||||
|
@@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void)
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
VMCOREINFO_NUMBER(PG_hwpoison);
|
||||
#endif
|
||||
VMCOREINFO_NUMBER(PG_head_mask);
|
||||
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
|
||||
|
||||
arch_crash_save_vmcoreinfo();
|
||||
|
@@ -31,3 +31,8 @@ static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
|
||||
{
|
||||
return (waiter != NULL);
|
||||
}
|
||||
|
||||
static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
|
||||
{
|
||||
debug_rt_mutex_print_deadlock(w);
|
||||
}
|
||||
|
@@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
owner = *p;
|
||||
} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
|
||||
}
|
||||
|
||||
/*
|
||||
* Safe fastpath aware unlock:
|
||||
* 1) Clear the waiters bit
|
||||
* 2) Drop lock->wait_lock
|
||||
* 3) Try to unlock the lock with cmpxchg
|
||||
*/
|
||||
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
|
||||
__releases(lock->wait_lock)
|
||||
{
|
||||
struct task_struct *owner = rt_mutex_owner(lock);
|
||||
|
||||
clear_rt_mutex_waiters(lock);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
/*
|
||||
* If a new waiter comes in between the unlock and the cmpxchg
|
||||
* we have two situations:
|
||||
*
|
||||
* unlock(wait_lock);
|
||||
* lock(wait_lock);
|
||||
* cmpxchg(p, owner, 0) == owner
|
||||
* mark_rt_mutex_waiters(lock);
|
||||
* acquire(lock);
|
||||
* or:
|
||||
*
|
||||
* unlock(wait_lock);
|
||||
* lock(wait_lock);
|
||||
* mark_rt_mutex_waiters(lock);
|
||||
*
|
||||
* cmpxchg(p, owner, 0) != owner
|
||||
* enqueue_waiter();
|
||||
* unlock(wait_lock);
|
||||
* lock(wait_lock);
|
||||
* wake waiter();
|
||||
* unlock(wait_lock);
|
||||
* lock(wait_lock);
|
||||
* acquire(lock);
|
||||
*/
|
||||
return rt_mutex_cmpxchg(lock, owner, NULL);
|
||||
}
|
||||
|
||||
#else
|
||||
# define rt_mutex_cmpxchg(l,c,n) (0)
|
||||
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
@@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
lock->owner = (struct task_struct *)
|
||||
((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple slow path only version: lock->owner is protected by lock->wait_lock.
|
||||
*/
|
||||
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
|
||||
__releases(lock->wait_lock)
|
||||
{
|
||||
lock->owner = NULL;
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int
|
||||
@@ -260,27 +312,36 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
|
||||
*/
|
||||
int max_lock_depth = 1024;
|
||||
|
||||
static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
|
||||
{
|
||||
return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the priority chain. Also used for deadlock detection.
|
||||
* Decreases task's usage by one - may thus free the task.
|
||||
*
|
||||
* @task: the task owning the mutex (owner) for which a chain walk is probably
|
||||
* needed
|
||||
* @task: the task owning the mutex (owner) for which a chain walk is
|
||||
* probably needed
|
||||
* @deadlock_detect: do we have to carry out deadlock detection?
|
||||
* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
|
||||
* things for a task that has just got its priority adjusted, and
|
||||
* is waiting on a mutex)
|
||||
* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
|
||||
* things for a task that has just got its priority adjusted, and
|
||||
* is waiting on a mutex)
|
||||
* @next_lock: the mutex on which the owner of @orig_lock was blocked before
|
||||
* we dropped its pi_lock. Is never dereferenced, only used for
|
||||
* comparison to detect lock chain changes.
|
||||
* @orig_waiter: rt_mutex_waiter struct for the task that has just donated
|
||||
* its priority to the mutex owner (can be NULL in the case
|
||||
* depicted above or if the top waiter is gone away and we are
|
||||
* actually deboosting the owner)
|
||||
* @top_task: the current top waiter
|
||||
* its priority to the mutex owner (can be NULL in the case
|
||||
* depicted above or if the top waiter is gone away and we are
|
||||
* actually deboosting the owner)
|
||||
* @top_task: the current top waiter
|
||||
*
|
||||
* Returns 0 or -EDEADLK.
|
||||
*/
|
||||
static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
int deadlock_detect,
|
||||
struct rt_mutex *orig_lock,
|
||||
struct rt_mutex *next_lock,
|
||||
struct rt_mutex_waiter *orig_waiter,
|
||||
struct task_struct *top_task)
|
||||
{
|
||||
@@ -314,7 +375,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
}
|
||||
put_task_struct(task);
|
||||
|
||||
return deadlock_detect ? -EDEADLK : 0;
|
||||
return -EDEADLK;
|
||||
}
|
||||
retry:
|
||||
/*
|
||||
@@ -338,6 +399,18 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
if (orig_waiter && !rt_mutex_owner(orig_lock))
|
||||
goto out_unlock_pi;
|
||||
|
||||
/*
|
||||
* We dropped all locks after taking a refcount on @task, so
|
||||
* the task might have moved on in the lock chain or even left
|
||||
* the chain completely and blocks now on an unrelated lock or
|
||||
* on @orig_lock.
|
||||
*
|
||||
* We stored the lock on which @task was blocked in @next_lock,
|
||||
* so we can detect the chain change.
|
||||
*/
|
||||
if (next_lock != waiter->lock)
|
||||
goto out_unlock_pi;
|
||||
|
||||
/*
|
||||
* Drop out, when the task has no waiters. Note,
|
||||
* top_waiter can be NULL, when we are in the deboosting
|
||||
@@ -377,7 +450,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
|
||||
debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
ret = deadlock_detect ? -EDEADLK : 0;
|
||||
ret = -EDEADLK;
|
||||
goto out_unlock_pi;
|
||||
}
|
||||
|
||||
@@ -422,11 +495,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
__rt_mutex_adjust_prio(task);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether the task which owns the current lock is pi
|
||||
* blocked itself. If yes we store a pointer to the lock for
|
||||
* the lock chain change detection above. After we dropped
|
||||
* task->pi_lock next_lock cannot be dereferenced anymore.
|
||||
*/
|
||||
next_lock = task_blocked_on_lock(task);
|
||||
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
top_waiter = rt_mutex_top_waiter(lock);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
/*
|
||||
* We reached the end of the lock chain. Stop right here. No
|
||||
* point to go back just to figure that out.
|
||||
*/
|
||||
if (!next_lock)
|
||||
goto out_put_task;
|
||||
|
||||
if (!detect_deadlock && waiter != top_waiter)
|
||||
goto out_put_task;
|
||||
|
||||
@@ -536,8 +624,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
{
|
||||
struct task_struct *owner = rt_mutex_owner(lock);
|
||||
struct rt_mutex_waiter *top_waiter = waiter;
|
||||
unsigned long flags;
|
||||
struct rt_mutex *next_lock;
|
||||
int chain_walk = 0, res;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Early deadlock detection. We really don't want the task to
|
||||
@@ -548,7 +637,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
* which is wrong, as the other waiter is not in a deadlock
|
||||
* situation.
|
||||
*/
|
||||
if (detect_deadlock && owner == task)
|
||||
if (owner == task)
|
||||
return -EDEADLK;
|
||||
|
||||
raw_spin_lock_irqsave(&task->pi_lock, flags);
|
||||
@@ -569,20 +658,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
if (!owner)
|
||||
return 0;
|
||||
|
||||
raw_spin_lock_irqsave(&owner->pi_lock, flags);
|
||||
if (waiter == rt_mutex_top_waiter(lock)) {
|
||||
raw_spin_lock_irqsave(&owner->pi_lock, flags);
|
||||
rt_mutex_dequeue_pi(owner, top_waiter);
|
||||
rt_mutex_enqueue_pi(owner, waiter);
|
||||
|
||||
__rt_mutex_adjust_prio(owner);
|
||||
if (owner->pi_blocked_on)
|
||||
chain_walk = 1;
|
||||
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
|
||||
}
|
||||
else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
|
||||
} else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
|
||||
chain_walk = 1;
|
||||
}
|
||||
|
||||
if (!chain_walk)
|
||||
/* Store the lock on which owner is blocked or NULL */
|
||||
next_lock = task_blocked_on_lock(owner);
|
||||
|
||||
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
|
||||
/*
|
||||
* Even if full deadlock detection is on, if the owner is not
|
||||
* blocked itself, we can avoid finding this out in the chain
|
||||
* walk.
|
||||
*/
|
||||
if (!chain_walk || !next_lock)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -594,8 +691,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
|
||||
task);
|
||||
res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
|
||||
next_lock, waiter, task);
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
|
||||
@@ -605,7 +702,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
/*
|
||||
* Wake up the next waiter on the lock.
|
||||
*
|
||||
* Remove the top waiter from the current tasks waiter list and wake it up.
|
||||
* Remove the top waiter from the current tasks pi waiter list and
|
||||
* wake it up.
|
||||
*
|
||||
* Called with lock->wait_lock held.
|
||||
*/
|
||||
@@ -626,10 +724,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
|
||||
*/
|
||||
rt_mutex_dequeue_pi(current, waiter);
|
||||
|
||||
rt_mutex_set_owner(lock, NULL);
|
||||
/*
|
||||
* As we are waking up the top waiter, and the waiter stays
|
||||
* queued on the lock until it gets the lock, this lock
|
||||
* obviously has waiters. Just set the bit here and this has
|
||||
* the added benefit of forcing all new tasks into the
|
||||
* slow path making sure no task of lower priority than
|
||||
* the top waiter can steal this lock.
|
||||
*/
|
||||
lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
|
||||
|
||||
raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
|
||||
|
||||
/*
|
||||
* It's safe to dereference waiter as it cannot go away as
|
||||
* long as we hold lock->wait_lock. The waiter task needs to
|
||||
* acquire it in order to dequeue the waiter.
|
||||
*/
|
||||
wake_up_process(waiter->task);
|
||||
}
|
||||
|
||||
@@ -644,8 +755,8 @@ static void remove_waiter(struct rt_mutex *lock,
|
||||
{
|
||||
int first = (waiter == rt_mutex_top_waiter(lock));
|
||||
struct task_struct *owner = rt_mutex_owner(lock);
|
||||
struct rt_mutex *next_lock = NULL;
|
||||
unsigned long flags;
|
||||
int chain_walk = 0;
|
||||
|
||||
raw_spin_lock_irqsave(¤t->pi_lock, flags);
|
||||
rt_mutex_dequeue(lock, waiter);
|
||||
@@ -669,13 +780,13 @@ static void remove_waiter(struct rt_mutex *lock,
|
||||
}
|
||||
__rt_mutex_adjust_prio(owner);
|
||||
|
||||
if (owner->pi_blocked_on)
|
||||
chain_walk = 1;
|
||||
/* Store the lock on which owner is blocked or NULL */
|
||||
next_lock = task_blocked_on_lock(owner);
|
||||
|
||||
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
|
||||
}
|
||||
|
||||
if (!chain_walk)
|
||||
if (!next_lock)
|
||||
return;
|
||||
|
||||
/* gets dropped in rt_mutex_adjust_prio_chain()! */
|
||||
@@ -683,7 +794,7 @@ static void remove_waiter(struct rt_mutex *lock,
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
|
||||
rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
}
|
||||
@@ -696,6 +807,7 @@ static void remove_waiter(struct rt_mutex *lock,
|
||||
void rt_mutex_adjust_pi(struct task_struct *task)
|
||||
{
|
||||
struct rt_mutex_waiter *waiter;
|
||||
struct rt_mutex *next_lock;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&task->pi_lock, flags);
|
||||
@@ -706,12 +818,13 @@ void rt_mutex_adjust_pi(struct task_struct *task)
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
next_lock = waiter->lock;
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
/* gets dropped in rt_mutex_adjust_prio_chain()! */
|
||||
get_task_struct(task);
|
||||
rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
|
||||
|
||||
rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -763,6 +876,26 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
|
||||
struct rt_mutex_waiter *w)
|
||||
{
|
||||
/*
|
||||
* If the result is not -EDEADLOCK or the caller requested
|
||||
* deadlock detection, nothing to do here.
|
||||
*/
|
||||
if (res != -EDEADLOCK || detect_deadlock)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Yell lowdly and stop the task right here.
|
||||
*/
|
||||
rt_mutex_print_deadlock(w);
|
||||
while (1) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Slow path lock function:
|
||||
*/
|
||||
@@ -802,8 +935,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
||||
|
||||
set_current_state(TASK_RUNNING);
|
||||
|
||||
if (unlikely(ret))
|
||||
if (unlikely(ret)) {
|
||||
remove_waiter(lock, &waiter);
|
||||
rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter);
|
||||
}
|
||||
|
||||
/*
|
||||
* try_to_take_rt_mutex() sets the waiter bit
|
||||
@@ -859,12 +994,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
|
||||
|
||||
rt_mutex_deadlock_account_unlock(current);
|
||||
|
||||
if (!rt_mutex_has_waiters(lock)) {
|
||||
lock->owner = NULL;
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
return;
|
||||
/*
|
||||
* We must be careful here if the fast path is enabled. If we
|
||||
* have no waiters queued we cannot set owner to NULL here
|
||||
* because of:
|
||||
*
|
||||
* foo->lock->owner = NULL;
|
||||
* rtmutex_lock(foo->lock); <- fast path
|
||||
* free = atomic_dec_and_test(foo->refcnt);
|
||||
* rtmutex_unlock(foo->lock); <- fast path
|
||||
* if (free)
|
||||
* kfree(foo);
|
||||
* raw_spin_unlock(foo->lock->wait_lock);
|
||||
*
|
||||
* So for the fastpath enabled kernel:
|
||||
*
|
||||
* Nothing can set the waiters bit as long as we hold
|
||||
* lock->wait_lock. So we do the following sequence:
|
||||
*
|
||||
* owner = rt_mutex_owner(lock);
|
||||
* clear_rt_mutex_waiters(lock);
|
||||
* raw_spin_unlock(&lock->wait_lock);
|
||||
* if (cmpxchg(&lock->owner, owner, 0) == owner)
|
||||
* return;
|
||||
* goto retry;
|
||||
*
|
||||
* The fastpath disabled variant is simple as all access to
|
||||
* lock->owner is serialized by lock->wait_lock:
|
||||
*
|
||||
* lock->owner = NULL;
|
||||
* raw_spin_unlock(&lock->wait_lock);
|
||||
*/
|
||||
while (!rt_mutex_has_waiters(lock)) {
|
||||
/* Drops lock->wait_lock ! */
|
||||
if (unlock_rt_mutex_safe(lock) == true)
|
||||
return;
|
||||
/* Relock the rtmutex and try again */
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The wakeup next waiter path does not suffer from the above
|
||||
* race. See the comments there.
|
||||
*/
|
||||
wakeup_next_waiter(lock);
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
@@ -1112,7 +1284,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
|
||||
/* We enforce deadlock detection for futexes */
|
||||
ret = task_blocks_on_rt_mutex(lock, waiter, task, 1);
|
||||
|
||||
if (ret && !rt_mutex_owner(lock)) {
|
||||
/*
|
||||
|
@@ -24,3 +24,8 @@
|
||||
#define debug_rt_mutex_print_deadlock(w) do { } while (0)
|
||||
#define debug_rt_mutex_detect_deadlock(w,d) (d)
|
||||
#define debug_rt_mutex_reset_waiter(w) do { } while (0)
|
||||
|
||||
static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
|
||||
{
|
||||
WARN(1, "rtmutex deadlock detected\n");
|
||||
}
|
||||
|
@@ -35,6 +35,7 @@
|
||||
|
||||
static int nocompress;
|
||||
static int noresume;
|
||||
static int nohibernate;
|
||||
static int resume_wait;
|
||||
static unsigned int resume_delay;
|
||||
static char resume_file[256] = CONFIG_PM_STD_PARTITION;
|
||||
@@ -62,6 +63,11 @@ bool freezer_test_done;
|
||||
|
||||
static const struct platform_hibernation_ops *hibernation_ops;
|
||||
|
||||
bool hibernation_available(void)
|
||||
{
|
||||
return (nohibernate == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* hibernation_set_ops - Set the global hibernate operations.
|
||||
* @ops: Hibernation operations to use in subsequent hibernation transitions.
|
||||
@@ -642,6 +648,11 @@ int hibernate(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!hibernation_available()) {
|
||||
pr_debug("PM: Hibernation not available.\n");
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
lock_system_sleep();
|
||||
/* The snapshot device should not be opened while we're running */
|
||||
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
|
||||
@@ -734,7 +745,7 @@ static int software_resume(void)
|
||||
/*
|
||||
* If the user said "noresume".. bail out early.
|
||||
*/
|
||||
if (noresume)
|
||||
if (noresume || !hibernation_available())
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -900,6 +911,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
int i;
|
||||
char *start = buf;
|
||||
|
||||
if (!hibernation_available())
|
||||
return sprintf(buf, "[disabled]\n");
|
||||
|
||||
for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
|
||||
if (!hibernation_modes[i])
|
||||
continue;
|
||||
@@ -934,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *p;
|
||||
int mode = HIBERNATION_INVALID;
|
||||
|
||||
if (!hibernation_available())
|
||||
return -EPERM;
|
||||
|
||||
p = memchr(buf, '\n', n);
|
||||
len = p ? p - buf : n;
|
||||
|
||||
@@ -1101,6 +1118,10 @@ static int __init hibernate_setup(char *str)
|
||||
noresume = 1;
|
||||
else if (!strncmp(str, "nocompress", 10))
|
||||
nocompress = 1;
|
||||
else if (!strncmp(str, "no", 2)) {
|
||||
noresume = 1;
|
||||
nohibernate = 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -1125,9 +1146,23 @@ static int __init resumedelay_setup(char *str)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init nohibernate_setup(char *str)
|
||||
{
|
||||
noresume = 1;
|
||||
nohibernate = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init kaslr_nohibernate_setup(char *str)
|
||||
{
|
||||
return nohibernate_setup(str);
|
||||
}
|
||||
|
||||
__setup("noresume", noresume_setup);
|
||||
__setup("resume_offset=", resume_offset_setup);
|
||||
__setup("resume=", resume_setup);
|
||||
__setup("hibernate=", hibernate_setup);
|
||||
__setup("resumewait", resumewait_setup);
|
||||
__setup("resumedelay=", resumedelay_setup);
|
||||
__setup("nohibernate", nohibernate_setup);
|
||||
__setup("kaslr", kaslr_nohibernate_setup);
|
||||
|
@@ -300,13 +300,11 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
s += sprintf(s,"%s ", pm_states[i].label);
|
||||
|
||||
#endif
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
s += sprintf(s, "%s\n", "disk");
|
||||
#else
|
||||
if (hibernation_available())
|
||||
s += sprintf(s, "disk ");
|
||||
if (s != buf)
|
||||
/* convert the last space to a newline */
|
||||
*(s-1) = '\n';
|
||||
#endif
|
||||
return (s - buf);
|
||||
}
|
||||
|
||||
|
@@ -49,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
|
||||
struct snapshot_data *data;
|
||||
int error;
|
||||
|
||||
if (!hibernation_available())
|
||||
return -EPERM;
|
||||
|
||||
lock_system_sleep();
|
||||
|
||||
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
|
||||
|
@@ -1416,9 +1416,10 @@ static int have_callable_console(void)
|
||||
/*
|
||||
* Can we actually use the console at this time on this cpu?
|
||||
*
|
||||
* Console drivers may assume that per-cpu resources have been allocated. So
|
||||
* unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
|
||||
* call them until this CPU is officially up.
|
||||
* Console drivers may assume that per-cpu resources have
|
||||
* been allocated. So unless they're explicitly marked as
|
||||
* being able to cope (CON_ANYTIME) don't call them until
|
||||
* this CPU is officially up.
|
||||
*/
|
||||
static inline int can_use_console(unsigned int cpu)
|
||||
{
|
||||
@@ -1431,10 +1432,8 @@ static inline int can_use_console(unsigned int cpu)
|
||||
* console_lock held, and 'console_locked' set) if it
|
||||
* is successful, false otherwise.
|
||||
*/
|
||||
static int console_trylock_for_printk(void)
|
||||
static int console_trylock_for_printk(unsigned int cpu)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
if (!console_trylock())
|
||||
return 0;
|
||||
/*
|
||||
@@ -1609,8 +1608,7 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
*/
|
||||
if (!oops_in_progress && !lockdep_recursing(current)) {
|
||||
recursion_bug = 1;
|
||||
local_irq_restore(flags);
|
||||
return 0;
|
||||
goto out_restore_irqs;
|
||||
}
|
||||
zap_locks();
|
||||
}
|
||||
@@ -1718,27 +1716,21 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
|
||||
logbuf_cpu = UINT_MAX;
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
lockdep_on();
|
||||
local_irq_restore(flags);
|
||||
|
||||
/* If called from the scheduler, we can not call up(). */
|
||||
if (in_sched)
|
||||
return printed_len;
|
||||
|
||||
/*
|
||||
* Disable preemption to avoid being preempted while holding
|
||||
* console_sem which would prevent anyone from printing to console
|
||||
*/
|
||||
preempt_disable();
|
||||
/*
|
||||
* Try to acquire and then immediately release the console semaphore.
|
||||
* The release will print out buffers and wake up /dev/kmsg and syslog()
|
||||
* users.
|
||||
*/
|
||||
if (console_trylock_for_printk())
|
||||
console_unlock();
|
||||
preempt_enable();
|
||||
if (!in_sched) {
|
||||
/*
|
||||
* Try to acquire and then immediately release the console
|
||||
* semaphore. The release will print out buffers and wake up
|
||||
* /dev/kmsg and syslog() users.
|
||||
*/
|
||||
if (console_trylock_for_printk(this_cpu))
|
||||
console_unlock();
|
||||
}
|
||||
|
||||
lockdep_on();
|
||||
out_restore_irqs:
|
||||
local_irq_restore(flags);
|
||||
return printed_len;
|
||||
}
|
||||
EXPORT_SYMBOL(vprintk_emit);
|
||||
|
57
kernel/smp.c
57
kernel/smp.c
@@ -30,6 +30,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
|
||||
|
||||
static void flush_smp_call_function_queue(bool warn_cpu_offline);
|
||||
|
||||
static int
|
||||
hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
{
|
||||
@@ -52,12 +54,27 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
/* Fall-through to the CPU_DEAD[_FROZEN] case. */
|
||||
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
free_cpumask_var(cfd->cpumask);
|
||||
free_percpu(cfd->csd);
|
||||
break;
|
||||
|
||||
case CPU_DYING:
|
||||
case CPU_DYING_FROZEN:
|
||||
/*
|
||||
* The IPIs for the smp-call-function callbacks queued by other
|
||||
* CPUs might arrive late, either due to hardware latencies or
|
||||
* because this CPU disabled interrupts (inside stop-machine)
|
||||
* before the IPIs were sent. So flush out any pending callbacks
|
||||
* explicitly (without waiting for the IPIs to arrive), to
|
||||
* ensure that the outgoing CPU doesn't go offline with work
|
||||
* still pending.
|
||||
*/
|
||||
flush_smp_call_function_queue(false);
|
||||
break;
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -178,23 +195,47 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoked by arch to handle an IPI for call function single. Must be
|
||||
* called from the arch with interrupts disabled.
|
||||
/**
|
||||
* generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
|
||||
*
|
||||
* Invoked by arch to handle an IPI for call function single.
|
||||
* Must be called with interrupts disabled.
|
||||
*/
|
||||
void generic_smp_call_function_single_interrupt(void)
|
||||
{
|
||||
flush_smp_call_function_queue(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
|
||||
*
|
||||
* @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
|
||||
* offline CPU. Skip this check if set to 'false'.
|
||||
*
|
||||
* Flush any pending smp-call-function callbacks queued on this CPU. This is
|
||||
* invoked by the generic IPI handler, as well as by a CPU about to go offline,
|
||||
* to ensure that all pending IPI callbacks are run before it goes completely
|
||||
* offline.
|
||||
*
|
||||
* Loop through the call_single_queue and run all the queued callbacks.
|
||||
* Must be called with interrupts disabled.
|
||||
*/
|
||||
static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
||||
{
|
||||
struct llist_head *head;
|
||||
struct llist_node *entry;
|
||||
struct call_single_data *csd, *csd_next;
|
||||
static bool warned;
|
||||
|
||||
entry = llist_del_all(&__get_cpu_var(call_single_queue));
|
||||
WARN_ON(!irqs_disabled());
|
||||
|
||||
head = &__get_cpu_var(call_single_queue);
|
||||
entry = llist_del_all(head);
|
||||
entry = llist_reverse_order(entry);
|
||||
|
||||
/*
|
||||
* Shouldn't receive this interrupt on a cpu that is not yet online.
|
||||
*/
|
||||
if (unlikely(!cpu_online(smp_processor_id()) && !warned)) {
|
||||
/* There shouldn't be any pending callbacks on an offline CPU. */
|
||||
if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
|
||||
!warned && !llist_empty(head))) {
|
||||
warned = true;
|
||||
WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
|
||||
|
||||
|
@@ -136,7 +136,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
|
||||
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
|
||||
static int maxolduid = 65535;
|
||||
static int minolduid;
|
||||
static int min_percpu_pagelist_fract = 8;
|
||||
|
||||
static int ngroups_max = NGROUPS_MAX;
|
||||
static const int cap_last_cap = CAP_LAST_CAP;
|
||||
@@ -152,10 +151,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
|
||||
#ifdef CONFIG_SPARC
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
extern int sysctl_tsb_ratio;
|
||||
#endif
|
||||
|
||||
#ifdef __hppa__
|
||||
extern int pwrsw_enabled;
|
||||
#endif
|
||||
@@ -865,6 +860,17 @@ static struct ctl_table kern_table[] = {
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
#ifdef CONFIG_SMP
|
||||
{
|
||||
.procname = "softlockup_all_cpu_backtrace",
|
||||
.data = &sysctl_softlockup_all_cpu_backtrace,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
#endif /* CONFIG_SMP */
|
||||
{
|
||||
.procname = "nmi_watchdog",
|
||||
.data = &watchdog_user_enabled,
|
||||
@@ -1321,7 +1327,7 @@ static struct ctl_table vm_table[] = {
|
||||
.maxlen = sizeof(percpu_pagelist_fraction),
|
||||
.mode = 0644,
|
||||
.proc_handler = percpu_pagelist_fraction_sysctl_handler,
|
||||
.extra1 = &min_percpu_pagelist_fract,
|
||||
.extra1 = &zero,
|
||||
},
|
||||
#ifdef CONFIG_MMU
|
||||
{
|
||||
|
@@ -1396,7 +1396,6 @@ void tracing_start(void)
|
||||
|
||||
arch_spin_unlock(&global_trace.max_lock);
|
||||
|
||||
ftrace_start();
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
|
||||
}
|
||||
@@ -1443,7 +1442,6 @@ void tracing_stop(void)
|
||||
struct ring_buffer *buffer;
|
||||
unsigned long flags;
|
||||
|
||||
ftrace_stop();
|
||||
raw_spin_lock_irqsave(&global_trace.start_lock, flags);
|
||||
if (global_trace.stop_count++)
|
||||
goto out;
|
||||
|
@@ -893,6 +893,9 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
|
||||
int ret;
|
||||
|
||||
if (file) {
|
||||
if (tu->tp.flags & TP_FLAG_PROFILE)
|
||||
return -EINTR;
|
||||
|
||||
link = kmalloc(sizeof(*link), GFP_KERNEL);
|
||||
if (!link)
|
||||
return -ENOMEM;
|
||||
@@ -901,29 +904,40 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
|
||||
list_add_tail_rcu(&link->list, &tu->tp.files);
|
||||
|
||||
tu->tp.flags |= TP_FLAG_TRACE;
|
||||
} else
|
||||
tu->tp.flags |= TP_FLAG_PROFILE;
|
||||
} else {
|
||||
if (tu->tp.flags & TP_FLAG_TRACE)
|
||||
return -EINTR;
|
||||
|
||||
ret = uprobe_buffer_enable();
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
tu->tp.flags |= TP_FLAG_PROFILE;
|
||||
}
|
||||
|
||||
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
|
||||
|
||||
if (enabled)
|
||||
return 0;
|
||||
|
||||
ret = uprobe_buffer_enable();
|
||||
if (ret)
|
||||
goto err_flags;
|
||||
|
||||
tu->consumer.filter = filter;
|
||||
ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
|
||||
if (ret) {
|
||||
if (file) {
|
||||
list_del(&link->list);
|
||||
kfree(link);
|
||||
tu->tp.flags &= ~TP_FLAG_TRACE;
|
||||
} else
|
||||
tu->tp.flags &= ~TP_FLAG_PROFILE;
|
||||
}
|
||||
if (ret)
|
||||
goto err_buffer;
|
||||
|
||||
return 0;
|
||||
|
||||
err_buffer:
|
||||
uprobe_buffer_disable();
|
||||
|
||||
err_flags:
|
||||
if (file) {
|
||||
list_del(&link->list);
|
||||
kfree(link);
|
||||
tu->tp.flags &= ~TP_FLAG_TRACE;
|
||||
} else {
|
||||
tu->tp.flags &= ~TP_FLAG_PROFILE;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1201,12 +1215,6 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
|
||||
current->utask->vaddr = (unsigned long) &udd;
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
if ((tu->tp.flags & TP_FLAG_TRACE) == 0 &&
|
||||
!uprobe_perf_filter(&tu->consumer, 0, current->mm))
|
||||
return UPROBE_HANDLER_REMOVE;
|
||||
#endif
|
||||
|
||||
if (WARN_ON_ONCE(!uprobe_cpu_buffer))
|
||||
return 0;
|
||||
|
||||
|
@@ -492,33 +492,29 @@ static int sys_tracepoint_refcount;
|
||||
|
||||
void syscall_regfunc(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct task_struct *g, *t;
|
||||
struct task_struct *p, *t;
|
||||
|
||||
if (!sys_tracepoint_refcount) {
|
||||
read_lock_irqsave(&tasklist_lock, flags);
|
||||
do_each_thread(g, t) {
|
||||
/* Skip kernel threads. */
|
||||
if (t->mm)
|
||||
set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
|
||||
} while_each_thread(g, t);
|
||||
read_unlock_irqrestore(&tasklist_lock, flags);
|
||||
read_lock(&tasklist_lock);
|
||||
for_each_process_thread(p, t) {
|
||||
set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
sys_tracepoint_refcount++;
|
||||
}
|
||||
|
||||
void syscall_unregfunc(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct task_struct *g, *t;
|
||||
struct task_struct *p, *t;
|
||||
|
||||
sys_tracepoint_refcount--;
|
||||
if (!sys_tracepoint_refcount) {
|
||||
read_lock_irqsave(&tasklist_lock, flags);
|
||||
do_each_thread(g, t) {
|
||||
read_lock(&tasklist_lock);
|
||||
for_each_process_thread(p, t) {
|
||||
clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
|
||||
} while_each_thread(g, t);
|
||||
read_unlock_irqrestore(&tasklist_lock, flags);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@@ -31,6 +31,12 @@
|
||||
|
||||
int watchdog_user_enabled = 1;
|
||||
int __read_mostly watchdog_thresh = 10;
|
||||
#ifdef CONFIG_SMP
|
||||
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
|
||||
#else
|
||||
#define sysctl_softlockup_all_cpu_backtrace 0
|
||||
#endif
|
||||
|
||||
static int __read_mostly watchdog_running;
|
||||
static u64 __read_mostly sample_period;
|
||||
|
||||
@@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
|
||||
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
|
||||
#endif
|
||||
static unsigned long soft_lockup_nmi_warn;
|
||||
|
||||
/* boot commands */
|
||||
/*
|
||||
@@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)
|
||||
}
|
||||
__setup("nosoftlockup", nosoftlockup_setup);
|
||||
/* */
|
||||
#ifdef CONFIG_SMP
|
||||
static int __init softlockup_all_cpu_backtrace_setup(char *str)
|
||||
{
|
||||
sysctl_softlockup_all_cpu_backtrace =
|
||||
!!simple_strtol(str, NULL, 0);
|
||||
return 1;
|
||||
}
|
||||
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
|
||||
@@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
||||
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
|
||||
struct pt_regs *regs = get_irq_regs();
|
||||
int duration;
|
||||
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
|
||||
|
||||
/* kick the hardlockup detector */
|
||||
watchdog_interrupt_count();
|
||||
@@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
||||
if (__this_cpu_read(soft_watchdog_warn) == true)
|
||||
return HRTIMER_RESTART;
|
||||
|
||||
if (softlockup_all_cpu_backtrace) {
|
||||
/* Prevent multiple soft-lockup reports if one cpu is already
|
||||
* engaged in dumping cpu back traces
|
||||
*/
|
||||
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
|
||||
/* Someone else will report us. Let's give up */
|
||||
__this_cpu_write(soft_watchdog_warn, true);
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
}
|
||||
|
||||
printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
|
||||
smp_processor_id(), duration,
|
||||
current->comm, task_pid_nr(current));
|
||||
@@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
||||
else
|
||||
dump_stack();
|
||||
|
||||
if (softlockup_all_cpu_backtrace) {
|
||||
/* Avoid generating two back traces for current
|
||||
* given that one is already made above
|
||||
*/
|
||||
trigger_allbutself_cpu_backtrace();
|
||||
|
||||
clear_bit(0, &soft_lockup_nmi_warn);
|
||||
/* Barrier to sync with other cpus */
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
if (softlockup_panic)
|
||||
panic("softlockup: hung tasks");
|
||||
__this_cpu_write(soft_watchdog_warn, true);
|
||||
@@ -527,10 +566,8 @@ static void update_timers_all_cpus(void)
|
||||
int cpu;
|
||||
|
||||
get_online_cpus();
|
||||
preempt_disable();
|
||||
for_each_online_cpu(cpu)
|
||||
update_timers(cpu);
|
||||
preempt_enable();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
|
@@ -3284,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
|
||||
}
|
||||
}
|
||||
|
||||
dev_set_uevent_suppress(&wq_dev->dev, false);
|
||||
kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
|
||||
return 0;
|
||||
}
|
||||
@@ -4879,7 +4880,7 @@ static void __init wq_numa_init(void)
|
||||
BUG_ON(!tbl);
|
||||
|
||||
for_each_node(node)
|
||||
BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
|
||||
BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
|
||||
node_online(node) ? node : NUMA_NO_NODE));
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
|
Reference in New Issue
Block a user