Merge branch 'locking/urgent' into locking/core, before applying larger changes and to refresh the branch with fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2014-07-17 11:45:29 +02:00
1129 changed files with 13884 additions and 8226 deletions

View File

@@ -220,9 +220,16 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE
endif
config ARCH_SUPPORTS_ATOMIC_RMW
bool
config MUTEX_SPIN_ON_OWNER
def_bool y
depends on SMP && !DEBUG_MUTEXES
depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
config RWSEM_SPIN_ON_OWNER
def_bool y
depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
config ARCH_USE_QUEUE_RWLOCK
bool

View File

@@ -1648,10 +1648,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
{
struct super_block *pinned_sb = NULL;
struct cgroup_subsys *ss;
struct cgroup_root *root;
struct cgroup_sb_opts opts;
struct dentry *dentry;
int ret;
int i;
bool new_sb;
/*
@@ -1677,6 +1680,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
goto out_unlock;
}
/*
* Destruction of cgroup root is asynchronous, so subsystems may
* still be dying after the previous unmount. Let's drain the
* dying subsystems. We just need to ensure that the ones
* unmounted previously finish dying and don't care about new ones
* starting. Testing ref liveliness is good enough.
*/
for_each_subsys(ss, i) {
if (!(opts.subsys_mask & (1 << i)) ||
ss->root == &cgrp_dfl_root)
continue;
if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
mutex_unlock(&cgroup_mutex);
msleep(10);
ret = restart_syscall();
goto out_free;
}
cgroup_put(&ss->root->cgrp);
}
for_each_root(root) {
bool name_match = false;
@@ -1717,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
}
/*
* A root's lifetime is governed by its root cgroup.
* tryget_live failure indicate that the root is being
* destroyed. Wait for destruction to complete so that the
* subsystems are free. We can use wait_queue for the wait
* but this path is super cold. Let's just sleep for a bit
* and retry.
* We want to reuse @root whose lifetime is governed by its
* ->cgrp. Let's check whether @root is alive and keep it
* that way. As cgroup_kill_sb() can happen anytime, we
* want to block it by pinning the sb so that @root doesn't
* get killed before mount is complete.
*
* With the sb pinned, tryget_live can reliably indicate
* whether @root can be reused. If it's being killed,
* drain it. We can use wait_queue for the wait but this
* path is super cold. Let's just sleep a bit and retry.
*/
if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
if (IS_ERR(pinned_sb) ||
!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
mutex_unlock(&cgroup_mutex);
if (!IS_ERR_OR_NULL(pinned_sb))
deactivate_super(pinned_sb);
msleep(10);
ret = restart_syscall();
goto out_free;
@@ -1770,6 +1802,16 @@ out_free:
CGROUP_SUPER_MAGIC, &new_sb);
if (IS_ERR(dentry) || !new_sb)
cgroup_put(&root->cgrp);
/*
* If @pinned_sb, we're reusing an existing root and holding an
* extra ref on its sb. Mount is complete. Put the extra ref.
*/
if (pinned_sb) {
WARN_ON(new_sb);
deactivate_super(pinned_sb);
}
return dentry;
}
@@ -3328,7 +3370,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
rcu_read_lock();
css_for_each_child(child, css) {
if (css->flags & CSS_ONLINE) {
if (child->flags & CSS_ONLINE) {
ret = true;
break;
}

View File

@@ -19,6 +19,7 @@
#include <linux/sched.h>
#include <linux/hardirq.h>
#include <linux/export.h>
#include <linux/kprobes.h>
#define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h>
@@ -104,6 +105,7 @@ void context_tracking_user_enter(void)
}
local_irq_restore(flags);
}
NOKPROBE_SYMBOL(context_tracking_user_enter);
#ifdef CONFIG_PREEMPT
/**
@@ -181,6 +183,7 @@ void context_tracking_user_exit(void)
}
local_irq_restore(flags);
}
NOKPROBE_SYMBOL(context_tracking_user_exit);
/**
* __context_tracking_task_switch - context switch the syscall callbacks

View File

@@ -1181,7 +1181,13 @@ done:
int current_cpuset_is_being_rebound(void)
{
return task_cs(current) == cpuset_being_rebound;
int ret;
rcu_read_lock();
ret = task_cs(current) == cpuset_being_rebound;
rcu_read_unlock();
return ret;
}
static int update_relax_domain_level(struct cpuset *cs, s64 val)
@@ -1617,7 +1623,17 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
* resources, wait for the previously scheduled operations before
* proceeding, so that we don't end up keep removing tasks added
* after execution capability is restored.
*
* cpuset_hotplug_work calls back into cgroup core via
* cgroup_transfer_tasks() and waiting for it from a cgroupfs
* operation like this one can lead to a deadlock through kernfs
* active_ref protection. Let's break the protection. Losing the
* protection is okay as we check whether @cs is online after
* grabbing cpuset_mutex anyway. This only happens on the legacy
* hierarchies.
*/
css_get(&cs->css);
kernfs_break_active_protection(of->kn);
flush_work(&cpuset_hotplug_work);
mutex_lock(&cpuset_mutex);
@@ -1645,6 +1661,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
free_trial_cpuset(trialcs);
out_unlock:
mutex_unlock(&cpuset_mutex);
kernfs_unbreak_active_protection(of->kn);
css_put(&cs->css);
return retval ?: nbytes;
}

View File

@@ -40,6 +40,7 @@
#include <linux/mm_types.h>
#include <linux/cgroup.h>
#include <linux/module.h>
#include <linux/mman.h>
#include "internal.h"
@@ -5128,6 +5129,7 @@ struct perf_mmap_event {
int maj, min;
u64 ino;
u64 ino_generation;
u32 prot, flags;
struct {
struct perf_event_header header;
@@ -5169,6 +5171,8 @@ static void perf_event_mmap_output(struct perf_event *event,
mmap_event->event_id.header.size += sizeof(mmap_event->min);
mmap_event->event_id.header.size += sizeof(mmap_event->ino);
mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
mmap_event->event_id.header.size += sizeof(mmap_event->prot);
mmap_event->event_id.header.size += sizeof(mmap_event->flags);
}
perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
@@ -5187,6 +5191,8 @@ static void perf_event_mmap_output(struct perf_event *event,
perf_output_put(&handle, mmap_event->min);
perf_output_put(&handle, mmap_event->ino);
perf_output_put(&handle, mmap_event->ino_generation);
perf_output_put(&handle, mmap_event->prot);
perf_output_put(&handle, mmap_event->flags);
}
__output_copy(&handle, mmap_event->file_name,
@@ -5205,6 +5211,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
struct file *file = vma->vm_file;
int maj = 0, min = 0;
u64 ino = 0, gen = 0;
u32 prot = 0, flags = 0;
unsigned int size;
char tmp[16];
char *buf = NULL;
@@ -5235,6 +5242,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
gen = inode->i_generation;
maj = MAJOR(dev);
min = MINOR(dev);
if (vma->vm_flags & VM_READ)
prot |= PROT_READ;
if (vma->vm_flags & VM_WRITE)
prot |= PROT_WRITE;
if (vma->vm_flags & VM_EXEC)
prot |= PROT_EXEC;
if (vma->vm_flags & VM_MAYSHARE)
flags = MAP_SHARED;
else
flags = MAP_PRIVATE;
if (vma->vm_flags & VM_DENYWRITE)
flags |= MAP_DENYWRITE;
if (vma->vm_flags & VM_MAYEXEC)
flags |= MAP_EXECUTABLE;
if (vma->vm_flags & VM_LOCKED)
flags |= MAP_LOCKED;
if (vma->vm_flags & VM_HUGETLB)
flags |= MAP_HUGETLB;
goto got_name;
} else {
name = (char *)arch_vma_name(vma);
@@ -5275,6 +5304,8 @@ got_name:
mmap_event->min = min;
mmap_event->ino = ino;
mmap_event->ino_generation = gen;
mmap_event->prot = prot;
mmap_event->flags = flags;
if (!(vma->vm_flags & VM_EXEC))
mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -5315,6 +5346,8 @@ void perf_event_mmap(struct vm_area_struct *vma)
/* .min (attr_mmap2 only) */
/* .ino (attr_mmap2 only) */
/* .ino_generation (attr_mmap2 only) */
/* .prot (attr_mmap2 only) */
/* .flags (attr_mmap2 only) */
};
perf_event_mmap_event(&mmap_event);
@@ -6897,10 +6930,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (ret)
return -EFAULT;
/* disabled for now */
if (attr->mmap2)
return -EINVAL;
if (attr->__reserved_1)
return -EINVAL;

View File

@@ -846,7 +846,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
{
int err;
if (!consumer_del(uprobe, uc)) /* WARN? */
if (WARN_ON(!consumer_del(uprobe, uc)))
return;
err = register_for_each_vma(uprobe, NULL);
@@ -927,7 +927,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,
int ret = -ENOENT;
uprobe = find_uprobe(inode, offset);
if (!uprobe)
if (WARN_ON(!uprobe))
return ret;
down_write(&uprobe->register_rwsem);
@@ -952,7 +952,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
struct uprobe *uprobe;
uprobe = find_uprobe(inode, offset);
if (!uprobe)
if (WARN_ON(!uprobe))
return;
down_write(&uprobe->register_rwsem);

View File

@@ -1487,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
total_forks++;
spin_unlock(&current->sighand->siglock);
syscall_tracepoint_update(p);
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
cgroup_post_fork(p);
if (clone_flags & CLONE_THREAD)

View File

@@ -455,9 +455,9 @@ EXPORT_SYMBOL_GPL(irq_alloc_hwirqs);
*/
void irq_free_hwirqs(unsigned int from, int cnt)
{
int i;
int i, j;
for (i = from; cnt > 0; i++, cnt--) {
for (i = from, j = cnt; j > 0; i++, j--) {
irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE);
arch_teardown_hwirq(i);
}

View File

@@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void)
#ifdef CONFIG_MEMORY_FAILURE
VMCOREINFO_NUMBER(PG_hwpoison);
#endif
VMCOREINFO_NUMBER(PG_head_mask);
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
arch_crash_save_vmcoreinfo();

View File

@@ -14,21 +14,47 @@
* called from interrupt context and we have preemption disabled while
* spinning.
*/
static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
/*
* We use the value 0 to represent "no CPU", thus the encoded value
* will be the CPU number incremented by 1.
*/
static inline int encode_cpu(int cpu_nr)
{
return cpu_nr + 1;
}
static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
{
int cpu_nr = encoded_cpu_val - 1;
return per_cpu_ptr(&osq_node, cpu_nr);
}
/*
* Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
* Can return NULL in case we were the last queued and we updated @lock instead.
*/
static inline struct optimistic_spin_queue *
osq_wait_next(struct optimistic_spin_queue **lock,
struct optimistic_spin_queue *node,
struct optimistic_spin_queue *prev)
static inline struct optimistic_spin_node *
osq_wait_next(struct optimistic_spin_queue *lock,
struct optimistic_spin_node *node,
struct optimistic_spin_node *prev)
{
struct optimistic_spin_queue *next = NULL;
struct optimistic_spin_node *next = NULL;
int curr = encode_cpu(smp_processor_id());
int old;
/*
* If there is a prev node in queue, then the 'old' value will be
* the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
* we're currently last in queue, then the queue will then become empty.
*/
old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
for (;;) {
if (*lock == node && cmpxchg(lock, node, prev) == node) {
if (atomic_read(&lock->tail) == curr &&
atomic_cmpxchg(&lock->tail, curr, old) == curr) {
/*
* We were the last queued, we moved @lock back. @prev
* will now observe @lock and will complete its
@@ -59,18 +85,23 @@ osq_wait_next(struct optimistic_spin_queue **lock,
return next;
}
bool osq_lock(struct optimistic_spin_queue **lock)
bool osq_lock(struct optimistic_spin_queue *lock)
{
struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
struct optimistic_spin_queue *prev, *next;
struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
struct optimistic_spin_node *prev, *next;
int curr = encode_cpu(smp_processor_id());
int old;
node->locked = 0;
node->next = NULL;
node->cpu = curr;
node->prev = prev = xchg(lock, node);
if (likely(prev == NULL))
old = atomic_xchg(&lock->tail, curr);
if (old == OSQ_UNLOCKED_VAL)
return true;
prev = decode_cpu(old);
node->prev = prev;
ACCESS_ONCE(prev->next) = node;
/*
@@ -149,20 +180,21 @@ unqueue:
return false;
}
void osq_unlock(struct optimistic_spin_queue **lock)
void osq_unlock(struct optimistic_spin_queue *lock)
{
struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
struct optimistic_spin_queue *next;
struct optimistic_spin_node *node, *next;
int curr = encode_cpu(smp_processor_id());
/*
* Fast path for the uncontended case.
*/
if (likely(cmpxchg(lock, node, NULL) == node))
if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
return;
/*
* Second most likely case.
*/
node = this_cpu_ptr(&osq_node);
next = xchg(&node->next, NULL);
if (next) {
ACCESS_ONCE(next->locked) = 1;

View File

@@ -118,12 +118,13 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
* mutex_lock()/rwsem_down_{read,write}() etc.
*/
struct optimistic_spin_queue {
struct optimistic_spin_queue *next, *prev;
struct optimistic_spin_node {
struct optimistic_spin_node *next, *prev;
int locked; /* 1 if lock acquired */
int cpu; /* encoded CPU # value */
};
extern bool osq_lock(struct optimistic_spin_queue **lock);
extern void osq_unlock(struct optimistic_spin_queue **lock);
extern bool osq_lock(struct optimistic_spin_queue *lock);
extern void osq_unlock(struct optimistic_spin_queue *lock);
#endif /* __LINUX_MCS_SPINLOCK_H */

View File

@@ -54,7 +54,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
INIT_LIST_HEAD(&lock->wait_list);
mutex_clear_owner(lock);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
lock->osq = NULL;
osq_lock_init(&lock->osq);
#endif
debug_mutex_init(lock, name, key);

View File

@@ -26,7 +26,7 @@ int rwsem_is_locked(struct rw_semaphore *sem)
unsigned long flags;
if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
ret = (sem->activity != 0);
ret = (sem->count != 0);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
}
return ret;
@@ -46,7 +46,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
sem->activity = 0;
sem->count = 0;
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
}
@@ -95,7 +95,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
waiter = list_entry(next, struct rwsem_waiter, list);
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
sem->activity += woken;
sem->count += woken;
out:
return sem;
@@ -126,9 +126,9 @@ void __sched __down_read(struct rw_semaphore *sem)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->activity++;
sem->count++;
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
goto out;
}
@@ -170,9 +170,9 @@ int __down_read_trylock(struct rw_semaphore *sem)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
/* granted */
sem->activity++;
sem->count++;
ret = 1;
}
@@ -206,7 +206,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
* itself into sleep and waiting for system woke it or someone
* else in the head of the wait list up.
*/
if (sem->activity == 0)
if (sem->count == 0)
break;
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -214,7 +214,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
/* got the lock */
sem->activity = -1;
sem->count = -1;
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -235,9 +235,9 @@ int __down_write_trylock(struct rw_semaphore *sem)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (sem->activity == 0) {
if (sem->count == 0) {
/* got the lock */
sem->activity = -1;
sem->count = -1;
ret = 1;
}
@@ -255,7 +255,7 @@ void __up_read(struct rw_semaphore *sem)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
if (--sem->activity == 0 && !list_empty(&sem->wait_list))
if (--sem->count == 0 && !list_empty(&sem->wait_list))
sem = __rwsem_wake_one_writer(sem);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -270,7 +270,7 @@ void __up_write(struct rw_semaphore *sem)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->activity = 0;
sem->count = 0;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 1);
@@ -287,7 +287,7 @@ void __downgrade_write(struct rw_semaphore *sem)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
sem->activity = 1;
sem->count = 1;
if (!list_empty(&sem->wait_list))
sem = __rwsem_do_wake(sem, 0);

View File

@@ -82,9 +82,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
sem->count = RWSEM_UNLOCKED_VALUE;
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
#ifdef CONFIG_SMP
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
sem->owner = NULL;
sem->osq = NULL;
osq_lock_init(&sem->osq);
#endif
}
@@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
return false;
}
#ifdef CONFIG_SMP
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
/*
* Try to acquire write lock before the writer has been put on wait queue.
*/
@@ -285,10 +285,10 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
{
struct task_struct *owner;
bool on_cpu = true;
bool on_cpu = false;
if (need_resched())
return 0;
return false;
rcu_read_lock();
owner = ACCESS_ONCE(sem->owner);
@@ -297,9 +297,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
rcu_read_unlock();
/*
* If sem->owner is not set, the rwsem owner may have
* just acquired it and not set the owner yet or the rwsem
* has been released.
* If sem->owner is not set, yet we have just recently entered the
* slowpath, then there is a possibility reader(s) may have the lock.
* To be safe, avoid spinning in these situations.
*/
return on_cpu;
}

View File

@@ -12,7 +12,7 @@
#include <linux/atomic.h>
#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{
sem->owner = current;

View File

@@ -35,6 +35,7 @@
static int nocompress;
static int noresume;
static int nohibernate;
static int resume_wait;
static unsigned int resume_delay;
static char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -62,6 +63,11 @@ bool freezer_test_done;
static const struct platform_hibernation_ops *hibernation_ops;
bool hibernation_available(void)
{
return (nohibernate == 0);
}
/**
* hibernation_set_ops - Set the global hibernate operations.
* @ops: Hibernation operations to use in subsequent hibernation transitions.
@@ -642,6 +648,11 @@ int hibernate(void)
{
int error;
if (!hibernation_available()) {
pr_debug("PM: Hibernation not available.\n");
return -EPERM;
}
lock_system_sleep();
/* The snapshot device should not be opened while we're running */
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
@@ -734,7 +745,7 @@ static int software_resume(void)
/*
* If the user said "noresume".. bail out early.
*/
if (noresume)
if (noresume || !hibernation_available())
return 0;
/*
@@ -900,6 +911,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
int i;
char *start = buf;
if (!hibernation_available())
return sprintf(buf, "[disabled]\n");
for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
if (!hibernation_modes[i])
continue;
@@ -934,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
char *p;
int mode = HIBERNATION_INVALID;
if (!hibernation_available())
return -EPERM;
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
@@ -1101,6 +1118,10 @@ static int __init hibernate_setup(char *str)
noresume = 1;
else if (!strncmp(str, "nocompress", 10))
nocompress = 1;
else if (!strncmp(str, "no", 2)) {
noresume = 1;
nohibernate = 1;
}
return 1;
}
@@ -1125,9 +1146,23 @@ static int __init resumedelay_setup(char *str)
return 1;
}
static int __init nohibernate_setup(char *str)
{
noresume = 1;
nohibernate = 1;
return 1;
}
static int __init kaslr_nohibernate_setup(char *str)
{
return nohibernate_setup(str);
}
__setup("noresume", noresume_setup);
__setup("resume_offset=", resume_offset_setup);
__setup("resume=", resume_setup);
__setup("hibernate=", hibernate_setup);
__setup("resumewait", resumewait_setup);
__setup("resumedelay=", resumedelay_setup);
__setup("nohibernate", nohibernate_setup);
__setup("kaslr", kaslr_nohibernate_setup);

View File

@@ -300,13 +300,11 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
s += sprintf(s,"%s ", pm_states[i].label);
#endif
#ifdef CONFIG_HIBERNATION
s += sprintf(s, "%s\n", "disk");
#else
if (hibernation_available())
s += sprintf(s, "disk ");
if (s != buf)
/* convert the last space to a newline */
*(s-1) = '\n';
#endif
return (s - buf);
}

View File

@@ -49,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
struct snapshot_data *data;
int error;
if (!hibernation_available())
return -EPERM;
lock_system_sleep();
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {

View File

@@ -1416,9 +1416,10 @@ static int have_callable_console(void)
/*
* Can we actually use the console at this time on this cpu?
*
* Console drivers may assume that per-cpu resources have been allocated. So
* unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
* call them until this CPU is officially up.
* Console drivers may assume that per-cpu resources have
* been allocated. So unless they're explicitly marked as
* being able to cope (CON_ANYTIME) don't call them until
* this CPU is officially up.
*/
static inline int can_use_console(unsigned int cpu)
{
@@ -1431,10 +1432,8 @@ static inline int can_use_console(unsigned int cpu)
* console_lock held, and 'console_locked' set) if it
* is successful, false otherwise.
*/
static int console_trylock_for_printk(void)
static int console_trylock_for_printk(unsigned int cpu)
{
unsigned int cpu = smp_processor_id();
if (!console_trylock())
return 0;
/*
@@ -1609,8 +1608,7 @@ asmlinkage int vprintk_emit(int facility, int level,
*/
if (!oops_in_progress && !lockdep_recursing(current)) {
recursion_bug = 1;
local_irq_restore(flags);
return 0;
goto out_restore_irqs;
}
zap_locks();
}
@@ -1718,27 +1716,21 @@ asmlinkage int vprintk_emit(int facility, int level,
logbuf_cpu = UINT_MAX;
raw_spin_unlock(&logbuf_lock);
lockdep_on();
local_irq_restore(flags);
/* If called from the scheduler, we can not call up(). */
if (in_sched)
return printed_len;
/*
* Disable preemption to avoid being preempted while holding
* console_sem which would prevent anyone from printing to console
*/
preempt_disable();
/*
* Try to acquire and then immediately release the console semaphore.
* The release will print out buffers and wake up /dev/kmsg and syslog()
* users.
*/
if (console_trylock_for_printk())
console_unlock();
preempt_enable();
if (!in_sched) {
/*
* Try to acquire and then immediately release the console
* semaphore. The release will print out buffers and wake up
* /dev/kmsg and syslog() users.
*/
if (console_trylock_for_printk(this_cpu))
console_unlock();
}
lockdep_on();
out_restore_irqs:
local_irq_restore(flags);
return printed_len;
}
EXPORT_SYMBOL(vprintk_emit);

View File

@@ -29,6 +29,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
static void flush_smp_call_function_queue(bool warn_cpu_offline);
static int
hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
@@ -51,12 +53,27 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
/* Fall-through to the CPU_DEAD[_FROZEN] case. */
case CPU_DEAD:
case CPU_DEAD_FROZEN:
free_cpumask_var(cfd->cpumask);
free_percpu(cfd->csd);
break;
case CPU_DYING:
case CPU_DYING_FROZEN:
/*
* The IPIs for the smp-call-function callbacks queued by other
* CPUs might arrive late, either due to hardware latencies or
* because this CPU disabled interrupts (inside stop-machine)
* before the IPIs were sent. So flush out any pending callbacks
* explicitly (without waiting for the IPIs to arrive), to
* ensure that the outgoing CPU doesn't go offline with work
* still pending.
*/
flush_smp_call_function_queue(false);
break;
#endif
};
@@ -177,23 +194,47 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
return 0;
}
/*
* Invoked by arch to handle an IPI for call function single. Must be
* called from the arch with interrupts disabled.
/**
* generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
*
* Invoked by arch to handle an IPI for call function single.
* Must be called with interrupts disabled.
*/
void generic_smp_call_function_single_interrupt(void)
{
flush_smp_call_function_queue(true);
}
/**
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
*
* @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
* offline CPU. Skip this check if set to 'false'.
*
* Flush any pending smp-call-function callbacks queued on this CPU. This is
* invoked by the generic IPI handler, as well as by a CPU about to go offline,
* to ensure that all pending IPI callbacks are run before it goes completely
* offline.
*
* Loop through the call_single_queue and run all the queued callbacks.
* Must be called with interrupts disabled.
*/
static void flush_smp_call_function_queue(bool warn_cpu_offline)
{
struct llist_head *head;
struct llist_node *entry;
struct call_single_data *csd, *csd_next;
static bool warned;
entry = llist_del_all(&__get_cpu_var(call_single_queue));
WARN_ON(!irqs_disabled());
head = &__get_cpu_var(call_single_queue);
entry = llist_del_all(head);
entry = llist_reverse_order(entry);
/*
* Shouldn't receive this interrupt on a cpu that is not yet online.
*/
if (unlikely(!cpu_online(smp_processor_id()) && !warned)) {
/* There shouldn't be any pending callbacks on an offline CPU. */
if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
!warned && !llist_empty(head))) {
warned = true;
WARN(1, "IPI on offline CPU %d\n", smp_processor_id());

View File

@@ -136,7 +136,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
static int maxolduid = 65535;
static int minolduid;
static int min_percpu_pagelist_fract = 8;
static int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
@@ -152,10 +151,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
#ifdef CONFIG_SPARC
#endif
#ifdef CONFIG_SPARC64
extern int sysctl_tsb_ratio;
#endif
#ifdef __hppa__
extern int pwrsw_enabled;
#endif
@@ -865,6 +860,17 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
#ifdef CONFIG_SMP
{
.procname = "softlockup_all_cpu_backtrace",
.data = &sysctl_softlockup_all_cpu_backtrace,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
#endif /* CONFIG_SMP */
{
.procname = "nmi_watchdog",
.data = &watchdog_user_enabled,
@@ -1321,7 +1327,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(percpu_pagelist_fraction),
.mode = 0644,
.proc_handler = percpu_pagelist_fraction_sysctl_handler,
.extra1 = &min_percpu_pagelist_fract,
.extra1 = &zero,
},
#ifdef CONFIG_MMU
{

View File

@@ -1396,7 +1396,6 @@ void tracing_start(void)
arch_spin_unlock(&global_trace.max_lock);
ftrace_start();
out:
raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
}
@@ -1443,7 +1442,6 @@ void tracing_stop(void)
struct ring_buffer *buffer;
unsigned long flags;
ftrace_stop();
raw_spin_lock_irqsave(&global_trace.start_lock, flags);
if (global_trace.stop_count++)
goto out;

View File

@@ -893,6 +893,9 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
int ret;
if (file) {
if (tu->tp.flags & TP_FLAG_PROFILE)
return -EINTR;
link = kmalloc(sizeof(*link), GFP_KERNEL);
if (!link)
return -ENOMEM;
@@ -901,29 +904,40 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
list_add_tail_rcu(&link->list, &tu->tp.files);
tu->tp.flags |= TP_FLAG_TRACE;
} else
tu->tp.flags |= TP_FLAG_PROFILE;
} else {
if (tu->tp.flags & TP_FLAG_TRACE)
return -EINTR;
ret = uprobe_buffer_enable();
if (ret < 0)
return ret;
tu->tp.flags |= TP_FLAG_PROFILE;
}
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
if (enabled)
return 0;
ret = uprobe_buffer_enable();
if (ret)
goto err_flags;
tu->consumer.filter = filter;
ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
if (ret) {
if (file) {
list_del(&link->list);
kfree(link);
tu->tp.flags &= ~TP_FLAG_TRACE;
} else
tu->tp.flags &= ~TP_FLAG_PROFILE;
}
if (ret)
goto err_buffer;
return 0;
err_buffer:
uprobe_buffer_disable();
err_flags:
if (file) {
list_del(&link->list);
kfree(link);
tu->tp.flags &= ~TP_FLAG_TRACE;
} else {
tu->tp.flags &= ~TP_FLAG_PROFILE;
}
return ret;
}
@@ -1201,12 +1215,6 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
current->utask->vaddr = (unsigned long) &udd;
#ifdef CONFIG_PERF_EVENTS
if ((tu->tp.flags & TP_FLAG_TRACE) == 0 &&
!uprobe_perf_filter(&tu->consumer, 0, current->mm))
return UPROBE_HANDLER_REMOVE;
#endif
if (WARN_ON_ONCE(!uprobe_cpu_buffer))
return 0;

View File

@@ -492,33 +492,29 @@ static int sys_tracepoint_refcount;
void syscall_regfunc(void)
{
unsigned long flags;
struct task_struct *g, *t;
struct task_struct *p, *t;
if (!sys_tracepoint_refcount) {
read_lock_irqsave(&tasklist_lock, flags);
do_each_thread(g, t) {
/* Skip kernel threads. */
if (t->mm)
set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
} while_each_thread(g, t);
read_unlock_irqrestore(&tasklist_lock, flags);
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
}
read_unlock(&tasklist_lock);
}
sys_tracepoint_refcount++;
}
void syscall_unregfunc(void)
{
unsigned long flags;
struct task_struct *g, *t;
struct task_struct *p, *t;
sys_tracepoint_refcount--;
if (!sys_tracepoint_refcount) {
read_lock_irqsave(&tasklist_lock, flags);
do_each_thread(g, t) {
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
} while_each_thread(g, t);
read_unlock_irqrestore(&tasklist_lock, flags);
}
read_unlock(&tasklist_lock);
}
}
#endif

View File

@@ -31,6 +31,12 @@
int watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
#else
#define sysctl_softlockup_all_cpu_backtrace 0
#endif
static int __read_mostly watchdog_running;
static u64 __read_mostly sample_period;
@@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
static unsigned long soft_lockup_nmi_warn;
/* boot commands */
/*
@@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)
}
__setup("nosoftlockup", nosoftlockup_setup);
/* */
#ifdef CONFIG_SMP
static int __init softlockup_all_cpu_backtrace_setup(char *str)
{
sysctl_softlockup_all_cpu_backtrace =
!!simple_strtol(str, NULL, 0);
return 1;
}
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
#endif
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
/* kick the hardlockup detector */
watchdog_interrupt_count();
@@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
if (__this_cpu_read(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
if (softlockup_all_cpu_backtrace) {
/* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping cpu back traces
*/
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
/* Someone else will report us. Let's give up */
__this_cpu_write(soft_watchdog_warn, true);
return HRTIMER_RESTART;
}
}
printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
@@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
else
dump_stack();
if (softlockup_all_cpu_backtrace) {
/* Avoid generating two back traces for current
* given that one is already made above
*/
trigger_allbutself_cpu_backtrace();
clear_bit(0, &soft_lockup_nmi_warn);
/* Barrier to sync with other cpus */
smp_mb__after_atomic();
}
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
@@ -527,10 +566,8 @@ static void update_timers_all_cpus(void)
int cpu;
get_online_cpus();
preempt_disable();
for_each_online_cpu(cpu)
update_timers(cpu);
preempt_enable();
put_online_cpus();
}

View File

@@ -3284,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
}
}
dev_set_uevent_suppress(&wq_dev->dev, false);
kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
return 0;
}
@@ -4879,7 +4880,7 @@ static void __init wq_numa_init(void)
BUG_ON(!tbl);
for_each_node(node)
BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
node_online(node) ? node : NUMA_NO_NODE));
for_each_possible_cpu(cpu) {