Merge branch 'sched/urgent' into sched/core, to merge fixes before applying new changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -220,9 +220,16 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE
|
||||
|
||||
endif
|
||||
|
||||
config ARCH_SUPPORTS_ATOMIC_RMW
|
||||
bool
|
||||
|
||||
config MUTEX_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
depends on SMP && !DEBUG_MUTEXES
|
||||
depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
|
||||
|
||||
config RWSEM_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
|
||||
|
||||
config ARCH_USE_QUEUE_RWLOCK
|
||||
bool
|
||||
|
@@ -2320,7 +2320,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
||||
next_parent = rcu_dereference(next_ctx->parent_ctx);
|
||||
|
||||
/* If neither context have a parent context; they cannot be clones. */
|
||||
if (!parent && !next_parent)
|
||||
if (!parent || !next_parent)
|
||||
goto unlock;
|
||||
|
||||
if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
|
||||
@@ -7458,7 +7458,19 @@ __perf_event_exit_task(struct perf_event *child_event,
|
||||
struct perf_event_context *child_ctx,
|
||||
struct task_struct *child)
|
||||
{
|
||||
perf_remove_from_context(child_event, true);
|
||||
/*
|
||||
* Do not destroy the 'original' grouping; because of the context
|
||||
* switch optimization the original events could've ended up in a
|
||||
* random child task.
|
||||
*
|
||||
* If we were to destroy the original group, all group related
|
||||
* operations would cease to function properly after this random
|
||||
* child dies.
|
||||
*
|
||||
* Do destroy all inherited groups, we don't care about those
|
||||
* and being thorough is better.
|
||||
*/
|
||||
perf_remove_from_context(child_event, !!child_event->parent);
|
||||
|
||||
/*
|
||||
* It can happen that the parent exits first, and has events
|
||||
@@ -7474,7 +7486,7 @@ __perf_event_exit_task(struct perf_event *child_event,
|
||||
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||
{
|
||||
struct perf_event *child_event, *next;
|
||||
struct perf_event_context *child_ctx;
|
||||
struct perf_event_context *child_ctx, *parent_ctx;
|
||||
unsigned long flags;
|
||||
|
||||
if (likely(!child->perf_event_ctxp[ctxn])) {
|
||||
@@ -7499,6 +7511,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||
raw_spin_lock(&child_ctx->lock);
|
||||
task_ctx_sched_out(child_ctx);
|
||||
child->perf_event_ctxp[ctxn] = NULL;
|
||||
|
||||
/*
|
||||
* In order to avoid freeing: child_ctx->parent_ctx->task
|
||||
* under perf_event_context::lock, grab another reference.
|
||||
*/
|
||||
parent_ctx = child_ctx->parent_ctx;
|
||||
if (parent_ctx)
|
||||
get_ctx(parent_ctx);
|
||||
|
||||
/*
|
||||
* If this context is a clone; unclone it so it can't get
|
||||
* swapped to another process while we're removing all
|
||||
@@ -7508,6 +7529,13 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||
update_context_time(child_ctx);
|
||||
raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
|
||||
|
||||
/*
|
||||
* Now that we no longer hold perf_event_context::lock, drop
|
||||
* our extra child_ctx->parent_ctx reference.
|
||||
*/
|
||||
if (parent_ctx)
|
||||
put_ctx(parent_ctx);
|
||||
|
||||
/*
|
||||
* Report the task dead after unscheduling the events so that we
|
||||
* won't get any samples after PERF_RECORD_EXIT. We can however still
|
||||
|
@@ -2037,19 +2037,23 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
|
||||
{
|
||||
unsigned long *iter;
|
||||
struct kprobe_blacklist_entry *ent;
|
||||
unsigned long offset = 0, size = 0;
|
||||
unsigned long entry, offset = 0, size = 0;
|
||||
|
||||
for (iter = start; iter < end; iter++) {
|
||||
if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) {
|
||||
pr_err("Failed to find blacklist %p\n", (void *)*iter);
|
||||
entry = arch_deref_entry_point((void *)*iter);
|
||||
|
||||
if (!kernel_text_address(entry) ||
|
||||
!kallsyms_lookup_size_offset(entry, &size, &offset)) {
|
||||
pr_err("Failed to find blacklist at %p\n",
|
||||
(void *)entry);
|
||||
continue;
|
||||
}
|
||||
|
||||
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
|
||||
if (!ent)
|
||||
return -ENOMEM;
|
||||
ent->start_addr = *iter;
|
||||
ent->end_addr = *iter + size;
|
||||
ent->start_addr = entry;
|
||||
ent->end_addr = entry + size;
|
||||
INIT_LIST_HEAD(&ent->list);
|
||||
list_add_tail(&ent->list, &kprobe_blacklist);
|
||||
}
|
||||
|
@@ -14,21 +14,47 @@
|
||||
* called from interrupt context and we have preemption disabled while
|
||||
* spinning.
|
||||
*/
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node);
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
|
||||
|
||||
/*
|
||||
* We use the value 0 to represent "no CPU", thus the encoded value
|
||||
* will be the CPU number incremented by 1.
|
||||
*/
|
||||
static inline int encode_cpu(int cpu_nr)
|
||||
{
|
||||
return cpu_nr + 1;
|
||||
}
|
||||
|
||||
static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
|
||||
{
|
||||
int cpu_nr = encoded_cpu_val - 1;
|
||||
|
||||
return per_cpu_ptr(&osq_node, cpu_nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
|
||||
* Can return NULL in case we were the last queued and we updated @lock instead.
|
||||
*/
|
||||
static inline struct optimistic_spin_queue *
|
||||
osq_wait_next(struct optimistic_spin_queue **lock,
|
||||
struct optimistic_spin_queue *node,
|
||||
struct optimistic_spin_queue *prev)
|
||||
static inline struct optimistic_spin_node *
|
||||
osq_wait_next(struct optimistic_spin_queue *lock,
|
||||
struct optimistic_spin_node *node,
|
||||
struct optimistic_spin_node *prev)
|
||||
{
|
||||
struct optimistic_spin_queue *next = NULL;
|
||||
struct optimistic_spin_node *next = NULL;
|
||||
int curr = encode_cpu(smp_processor_id());
|
||||
int old;
|
||||
|
||||
/*
|
||||
* If there is a prev node in queue, then the 'old' value will be
|
||||
* the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
|
||||
* we're currently last in queue, then the queue will then become empty.
|
||||
*/
|
||||
old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
|
||||
|
||||
for (;;) {
|
||||
if (*lock == node && cmpxchg(lock, node, prev) == node) {
|
||||
if (atomic_read(&lock->tail) == curr &&
|
||||
atomic_cmpxchg(&lock->tail, curr, old) == curr) {
|
||||
/*
|
||||
* We were the last queued, we moved @lock back. @prev
|
||||
* will now observe @lock and will complete its
|
||||
@@ -59,18 +85,23 @@ osq_wait_next(struct optimistic_spin_queue **lock,
|
||||
return next;
|
||||
}
|
||||
|
||||
bool osq_lock(struct optimistic_spin_queue **lock)
|
||||
bool osq_lock(struct optimistic_spin_queue *lock)
|
||||
{
|
||||
struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
|
||||
struct optimistic_spin_queue *prev, *next;
|
||||
struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
|
||||
struct optimistic_spin_node *prev, *next;
|
||||
int curr = encode_cpu(smp_processor_id());
|
||||
int old;
|
||||
|
||||
node->locked = 0;
|
||||
node->next = NULL;
|
||||
node->cpu = curr;
|
||||
|
||||
node->prev = prev = xchg(lock, node);
|
||||
if (likely(prev == NULL))
|
||||
old = atomic_xchg(&lock->tail, curr);
|
||||
if (old == OSQ_UNLOCKED_VAL)
|
||||
return true;
|
||||
|
||||
prev = decode_cpu(old);
|
||||
node->prev = prev;
|
||||
ACCESS_ONCE(prev->next) = node;
|
||||
|
||||
/*
|
||||
@@ -149,20 +180,21 @@ unqueue:
|
||||
return false;
|
||||
}
|
||||
|
||||
void osq_unlock(struct optimistic_spin_queue **lock)
|
||||
void osq_unlock(struct optimistic_spin_queue *lock)
|
||||
{
|
||||
struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
|
||||
struct optimistic_spin_queue *next;
|
||||
struct optimistic_spin_node *node, *next;
|
||||
int curr = encode_cpu(smp_processor_id());
|
||||
|
||||
/*
|
||||
* Fast path for the uncontended case.
|
||||
*/
|
||||
if (likely(cmpxchg(lock, node, NULL) == node))
|
||||
if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Second most likely case.
|
||||
*/
|
||||
node = this_cpu_ptr(&osq_node);
|
||||
next = xchg(&node->next, NULL);
|
||||
if (next) {
|
||||
ACCESS_ONCE(next->locked) = 1;
|
||||
|
@@ -118,12 +118,13 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
|
||||
* mutex_lock()/rwsem_down_{read,write}() etc.
|
||||
*/
|
||||
|
||||
struct optimistic_spin_queue {
|
||||
struct optimistic_spin_queue *next, *prev;
|
||||
struct optimistic_spin_node {
|
||||
struct optimistic_spin_node *next, *prev;
|
||||
int locked; /* 1 if lock acquired */
|
||||
int cpu; /* encoded CPU # value */
|
||||
};
|
||||
|
||||
extern bool osq_lock(struct optimistic_spin_queue **lock);
|
||||
extern void osq_unlock(struct optimistic_spin_queue **lock);
|
||||
extern bool osq_lock(struct optimistic_spin_queue *lock);
|
||||
extern void osq_unlock(struct optimistic_spin_queue *lock);
|
||||
|
||||
#endif /* __LINUX_MCS_SPINLOCK_H */
|
||||
|
@@ -60,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
|
||||
INIT_LIST_HEAD(&lock->wait_list);
|
||||
mutex_clear_owner(lock);
|
||||
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
|
||||
lock->osq = NULL;
|
||||
osq_lock_init(&lock->osq);
|
||||
#endif
|
||||
|
||||
debug_mutex_init(lock, name, key);
|
||||
|
@@ -26,7 +26,7 @@ int rwsem_is_locked(struct rw_semaphore *sem)
|
||||
unsigned long flags;
|
||||
|
||||
if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
|
||||
ret = (sem->activity != 0);
|
||||
ret = (sem->count != 0);
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
}
|
||||
return ret;
|
||||
@@ -46,7 +46,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
|
||||
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
|
||||
lockdep_init_map(&sem->dep_map, name, key, 0);
|
||||
#endif
|
||||
sem->activity = 0;
|
||||
sem->count = 0;
|
||||
raw_spin_lock_init(&sem->wait_lock);
|
||||
INIT_LIST_HEAD(&sem->wait_list);
|
||||
}
|
||||
@@ -95,7 +95,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
|
||||
waiter = list_entry(next, struct rwsem_waiter, list);
|
||||
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
|
||||
|
||||
sem->activity += woken;
|
||||
sem->count += woken;
|
||||
|
||||
out:
|
||||
return sem;
|
||||
@@ -126,9 +126,9 @@ void __sched __down_read(struct rw_semaphore *sem)
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
|
||||
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
|
||||
/* granted */
|
||||
sem->activity++;
|
||||
sem->count++;
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
goto out;
|
||||
}
|
||||
@@ -170,9 +170,9 @@ int __down_read_trylock(struct rw_semaphore *sem)
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
|
||||
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
|
||||
/* granted */
|
||||
sem->activity++;
|
||||
sem->count++;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
@@ -206,7 +206,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
|
||||
* itself into sleep and waiting for system woke it or someone
|
||||
* else in the head of the wait list up.
|
||||
*/
|
||||
if (sem->activity == 0)
|
||||
if (sem->count == 0)
|
||||
break;
|
||||
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
@@ -214,7 +214,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
}
|
||||
/* got the lock */
|
||||
sem->activity = -1;
|
||||
sem->count = -1;
|
||||
list_del(&waiter.list);
|
||||
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
@@ -235,9 +235,9 @@ int __down_write_trylock(struct rw_semaphore *sem)
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
if (sem->activity == 0) {
|
||||
if (sem->count == 0) {
|
||||
/* got the lock */
|
||||
sem->activity = -1;
|
||||
sem->count = -1;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
@@ -255,7 +255,7 @@ void __up_read(struct rw_semaphore *sem)
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
if (--sem->activity == 0 && !list_empty(&sem->wait_list))
|
||||
if (--sem->count == 0 && !list_empty(&sem->wait_list))
|
||||
sem = __rwsem_wake_one_writer(sem);
|
||||
|
||||
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
||||
@@ -270,7 +270,7 @@ void __up_write(struct rw_semaphore *sem)
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
sem->activity = 0;
|
||||
sem->count = 0;
|
||||
if (!list_empty(&sem->wait_list))
|
||||
sem = __rwsem_do_wake(sem, 1);
|
||||
|
||||
@@ -287,7 +287,7 @@ void __downgrade_write(struct rw_semaphore *sem)
|
||||
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
|
||||
sem->activity = 1;
|
||||
sem->count = 1;
|
||||
if (!list_empty(&sem->wait_list))
|
||||
sem = __rwsem_do_wake(sem, 0);
|
||||
|
||||
|
@@ -82,9 +82,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
|
||||
sem->count = RWSEM_UNLOCKED_VALUE;
|
||||
raw_spin_lock_init(&sem->wait_lock);
|
||||
INIT_LIST_HEAD(&sem->wait_list);
|
||||
#ifdef CONFIG_SMP
|
||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
sem->owner = NULL;
|
||||
sem->osq = NULL;
|
||||
osq_lock_init(&sem->osq);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
/*
|
||||
* Try to acquire write lock before the writer has been put on wait queue.
|
||||
*/
|
||||
@@ -285,10 +285,10 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
|
||||
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
struct task_struct *owner;
|
||||
bool on_cpu = true;
|
||||
bool on_cpu = false;
|
||||
|
||||
if (need_resched())
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
owner = ACCESS_ONCE(sem->owner);
|
||||
@@ -297,9 +297,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* If sem->owner is not set, the rwsem owner may have
|
||||
* just acquired it and not set the owner yet or the rwsem
|
||||
* has been released.
|
||||
* If sem->owner is not set, yet we have just recently entered the
|
||||
* slowpath, then there is a possibility reader(s) may have the lock.
|
||||
* To be safe, avoid spinning in these situations.
|
||||
*/
|
||||
return on_cpu;
|
||||
}
|
||||
|
@@ -12,7 +12,7 @@
|
||||
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
|
||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
static inline void rwsem_set_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
sem->owner = current;
|
||||
|
@@ -186,6 +186,7 @@ void thaw_processes(void)
|
||||
|
||||
printk("Restarting tasks ... ");
|
||||
|
||||
__usermodehelper_set_disable_depth(UMH_FREEZING);
|
||||
thaw_workqueues();
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
|
@@ -306,7 +306,7 @@ int suspend_devices_and_enter(suspend_state_t state)
|
||||
error = suspend_ops->begin(state);
|
||||
if (error)
|
||||
goto Close;
|
||||
} else if (state == PM_SUSPEND_FREEZE && freeze_ops->begin) {
|
||||
} else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) {
|
||||
error = freeze_ops->begin();
|
||||
if (error)
|
||||
goto Close;
|
||||
@@ -335,7 +335,7 @@ int suspend_devices_and_enter(suspend_state_t state)
|
||||
Close:
|
||||
if (need_suspend_ops(state) && suspend_ops->end)
|
||||
suspend_ops->end();
|
||||
else if (state == PM_SUSPEND_FREEZE && freeze_ops->end)
|
||||
else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end)
|
||||
freeze_ops->end();
|
||||
|
||||
return error;
|
||||
|
@@ -206,6 +206,70 @@ void rcu_bh_qs(int cpu)
|
||||
rdp->passed_quiesce = 1;
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
|
||||
|
||||
static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
|
||||
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
|
||||
.dynticks = ATOMIC_INIT(1),
|
||||
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
|
||||
.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
|
||||
.dynticks_idle = ATOMIC_INIT(1),
|
||||
#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
|
||||
};
|
||||
|
||||
/*
|
||||
* Let the RCU core know that this CPU has gone through the scheduler,
|
||||
* which is a quiescent state. This is called when the need for a
|
||||
* quiescent state is urgent, so we burn an atomic operation and full
|
||||
* memory barriers to let the RCU core know about it, regardless of what
|
||||
* this CPU might (or might not) do in the near future.
|
||||
*
|
||||
* We inform the RCU core by emulating a zero-duration dyntick-idle
|
||||
* period, which we in turn do by incrementing the ->dynticks counter
|
||||
* by two.
|
||||
*/
|
||||
static void rcu_momentary_dyntick_idle(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_dynticks *rdtp;
|
||||
int resched_mask;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* Yes, we can lose flag-setting operations. This is OK, because
|
||||
* the flag will be set again after some delay.
|
||||
*/
|
||||
resched_mask = raw_cpu_read(rcu_sched_qs_mask);
|
||||
raw_cpu_write(rcu_sched_qs_mask, 0);
|
||||
|
||||
/* Find the flavor that needs a quiescent state. */
|
||||
for_each_rcu_flavor(rsp) {
|
||||
rdp = raw_cpu_ptr(rsp->rda);
|
||||
if (!(resched_mask & rsp->flavor_mask))
|
||||
continue;
|
||||
smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */
|
||||
if (ACCESS_ONCE(rdp->mynode->completed) !=
|
||||
ACCESS_ONCE(rdp->cond_resched_completed))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Pretend to be momentarily idle for the quiescent state.
|
||||
* This allows the grace-period kthread to record the
|
||||
* quiescent state, with no need for this CPU to do anything
|
||||
* further.
|
||||
*/
|
||||
rdtp = this_cpu_ptr(&rcu_dynticks);
|
||||
smp_mb__before_atomic(); /* Earlier stuff before QS. */
|
||||
atomic_add(2, &rdtp->dynticks); /* QS. */
|
||||
smp_mb__after_atomic(); /* Later stuff after QS. */
|
||||
break;
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note a context switch. This is a quiescent state for RCU-sched,
|
||||
* and requires special handling for preemptible RCU.
|
||||
@@ -216,19 +280,12 @@ void rcu_note_context_switch(int cpu)
|
||||
trace_rcu_utilization(TPS("Start context switch"));
|
||||
rcu_sched_qs(cpu);
|
||||
rcu_preempt_note_context_switch(cpu);
|
||||
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
|
||||
rcu_momentary_dyntick_idle();
|
||||
trace_rcu_utilization(TPS("End context switch"));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
||||
|
||||
static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
|
||||
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
|
||||
.dynticks = ATOMIC_INIT(1),
|
||||
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
|
||||
.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
|
||||
.dynticks_idle = ATOMIC_INIT(1),
|
||||
#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
|
||||
};
|
||||
|
||||
static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
|
||||
static long qhimark = 10000; /* If this many pending, ignore blimit. */
|
||||
static long qlowmark = 100; /* Once only this many pending, use blimit. */
|
||||
@@ -243,6 +300,13 @@ static ulong jiffies_till_next_fqs = ULONG_MAX;
|
||||
module_param(jiffies_till_first_fqs, ulong, 0644);
|
||||
module_param(jiffies_till_next_fqs, ulong, 0644);
|
||||
|
||||
/*
|
||||
* How long the grace period must be before we start recruiting
|
||||
* quiescent-state help from rcu_note_context_switch().
|
||||
*/
|
||||
static ulong jiffies_till_sched_qs = HZ / 20;
|
||||
module_param(jiffies_till_sched_qs, ulong, 0644);
|
||||
|
||||
static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
struct rcu_data *rdp);
|
||||
static void force_qs_rnp(struct rcu_state *rsp,
|
||||
@@ -853,6 +917,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
|
||||
bool *isidle, unsigned long *maxj)
|
||||
{
|
||||
unsigned int curr;
|
||||
int *rcrmp;
|
||||
unsigned int snap;
|
||||
|
||||
curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
|
||||
@@ -893,27 +958,43 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
|
||||
}
|
||||
|
||||
/*
|
||||
* There is a possibility that a CPU in adaptive-ticks state
|
||||
* might run in the kernel with the scheduling-clock tick disabled
|
||||
* for an extended time period. Invoke rcu_kick_nohz_cpu() to
|
||||
* force the CPU to restart the scheduling-clock tick in this
|
||||
* CPU is in this state.
|
||||
* A CPU running for an extended time within the kernel can
|
||||
* delay RCU grace periods. When the CPU is in NO_HZ_FULL mode,
|
||||
* even context-switching back and forth between a pair of
|
||||
* in-kernel CPU-bound tasks cannot advance grace periods.
|
||||
* So if the grace period is old enough, make the CPU pay attention.
|
||||
* Note that the unsynchronized assignments to the per-CPU
|
||||
* rcu_sched_qs_mask variable are safe. Yes, setting of
|
||||
* bits can be lost, but they will be set again on the next
|
||||
* force-quiescent-state pass. So lost bit sets do not result
|
||||
* in incorrect behavior, merely in a grace period lasting
|
||||
* a few jiffies longer than it might otherwise. Because
|
||||
* there are at most four threads involved, and because the
|
||||
* updates are only once every few jiffies, the probability of
|
||||
* lossage (and thus of slight grace-period extension) is
|
||||
* quite low.
|
||||
*
|
||||
* Note that if the jiffies_till_sched_qs boot/sysfs parameter
|
||||
* is set too high, we override with half of the RCU CPU stall
|
||||
* warning delay.
|
||||
*/
|
||||
rcu_kick_nohz_cpu(rdp->cpu);
|
||||
|
||||
/*
|
||||
* Alternatively, the CPU might be running in the kernel
|
||||
* for an extended period of time without a quiescent state.
|
||||
* Attempt to force the CPU through the scheduler to gain the
|
||||
* needed quiescent state, but only if the grace period has gone
|
||||
* on for an uncommonly long time. If there are many stuck CPUs,
|
||||
* we will beat on the first one until it gets unstuck, then move
|
||||
* to the next. Only do this for the primary flavor of RCU.
|
||||
*/
|
||||
if (rdp->rsp == rcu_state_p &&
|
||||
rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu);
|
||||
if (ULONG_CMP_GE(jiffies,
|
||||
rdp->rsp->gp_start + jiffies_till_sched_qs) ||
|
||||
ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
|
||||
rdp->rsp->jiffies_resched += 5;
|
||||
resched_cpu(rdp->cpu);
|
||||
if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
|
||||
ACCESS_ONCE(rdp->cond_resched_completed) =
|
||||
ACCESS_ONCE(rdp->mynode->completed);
|
||||
smp_mb(); /* ->cond_resched_completed before *rcrmp. */
|
||||
ACCESS_ONCE(*rcrmp) =
|
||||
ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask;
|
||||
resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
|
||||
rdp->rsp->jiffies_resched += 5; /* Enable beating. */
|
||||
} else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
|
||||
/* Time to beat on that CPU again! */
|
||||
resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
|
||||
rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -3491,6 +3572,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
|
||||
"rcu_node_fqs_1",
|
||||
"rcu_node_fqs_2",
|
||||
"rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
|
||||
static u8 fl_mask = 0x1;
|
||||
int cpustride = 1;
|
||||
int i;
|
||||
int j;
|
||||
@@ -3509,6 +3591,8 @@ static void __init rcu_init_one(struct rcu_state *rsp,
|
||||
for (i = 1; i < rcu_num_lvls; i++)
|
||||
rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
|
||||
rcu_init_levelspread(rsp);
|
||||
rsp->flavor_mask = fl_mask;
|
||||
fl_mask <<= 1;
|
||||
|
||||
/* Initialize the elements themselves, starting from the leaves. */
|
||||
|
||||
|
@@ -307,6 +307,9 @@ struct rcu_data {
|
||||
/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
|
||||
unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
|
||||
unsigned long offline_fqs; /* Kicked due to being offline. */
|
||||
unsigned long cond_resched_completed;
|
||||
/* Grace period that needs help */
|
||||
/* from cond_resched(). */
|
||||
|
||||
/* 5) __rcu_pending() statistics. */
|
||||
unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
|
||||
@@ -392,6 +395,7 @@ struct rcu_state {
|
||||
struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
|
||||
u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
|
||||
u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
|
||||
u8 flavor_mask; /* bit in flavor mask. */
|
||||
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
|
||||
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
|
||||
void (*func)(struct rcu_head *head));
|
||||
@@ -563,7 +567,7 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
|
||||
static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
|
||||
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
|
||||
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
|
||||
static void rcu_kick_nohz_cpu(int cpu);
|
||||
static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
|
||||
static bool init_nocb_callback_list(struct rcu_data *rdp);
|
||||
static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
|
||||
static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
|
||||
|
@@ -2404,7 +2404,7 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
|
||||
* if an adaptive-ticks CPU is failing to respond to the current grace
|
||||
* period and has not be idle from an RCU perspective, kick it.
|
||||
*/
|
||||
static void rcu_kick_nohz_cpu(int cpu)
|
||||
static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
if (tick_nohz_full_cpu(cpu))
|
||||
|
@@ -200,12 +200,12 @@ void wait_rcu_gp(call_rcu_func_t crf)
|
||||
EXPORT_SYMBOL_GPL(wait_rcu_gp);
|
||||
|
||||
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
|
||||
static inline void debug_init_rcu_head(struct rcu_head *head)
|
||||
void init_rcu_head(struct rcu_head *head)
|
||||
{
|
||||
debug_object_init(head, &rcuhead_debug_descr);
|
||||
}
|
||||
|
||||
static inline void debug_rcu_head_free(struct rcu_head *head)
|
||||
void destroy_rcu_head(struct rcu_head *head)
|
||||
{
|
||||
debug_object_free(head, &rcuhead_debug_descr);
|
||||
}
|
||||
@@ -350,21 +350,3 @@ static int __init check_cpu_stall_init(void)
|
||||
early_initcall(check_cpu_stall_init);
|
||||
|
||||
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
|
||||
|
||||
/*
|
||||
* Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
|
||||
*/
|
||||
|
||||
DEFINE_PER_CPU(int, rcu_cond_resched_count);
|
||||
|
||||
/*
|
||||
* Report a set of RCU quiescent states, for use by cond_resched()
|
||||
* and friends. Out of line due to being called infrequently.
|
||||
*/
|
||||
void rcu_resched(void)
|
||||
{
|
||||
preempt_disable();
|
||||
__this_cpu_write(rcu_cond_resched_count, 0);
|
||||
rcu_note_context_switch(smp_processor_id());
|
||||
preempt_enable();
|
||||
}
|
||||
|
@@ -3573,9 +3573,10 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
|
||||
};
|
||||
|
||||
/*
|
||||
* Fixup the legacy SCHED_RESET_ON_FORK hack
|
||||
* Fixup the legacy SCHED_RESET_ON_FORK hack, except if
|
||||
* the policy=-1 was passed by sched_setparam().
|
||||
*/
|
||||
if (policy & SCHED_RESET_ON_FORK) {
|
||||
if ((policy != -1) && (policy & SCHED_RESET_ON_FORK)) {
|
||||
attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
|
||||
policy &= ~SCHED_RESET_ON_FORK;
|
||||
attr.sched_policy = policy;
|
||||
@@ -4162,7 +4163,6 @@ static void __cond_resched(void)
|
||||
|
||||
int __sched _cond_resched(void)
|
||||
{
|
||||
rcu_cond_resched();
|
||||
if (should_resched()) {
|
||||
__cond_resched();
|
||||
return 1;
|
||||
@@ -4181,18 +4181,15 @@ EXPORT_SYMBOL(_cond_resched);
|
||||
*/
|
||||
int __cond_resched_lock(spinlock_t *lock)
|
||||
{
|
||||
bool need_rcu_resched = rcu_should_resched();
|
||||
int resched = should_resched();
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(lock);
|
||||
|
||||
if (spin_needbreak(lock) || resched || need_rcu_resched) {
|
||||
if (spin_needbreak(lock) || resched) {
|
||||
spin_unlock(lock);
|
||||
if (resched)
|
||||
__cond_resched();
|
||||
else if (unlikely(need_rcu_resched))
|
||||
rcu_resched();
|
||||
else
|
||||
cpu_relax();
|
||||
ret = 1;
|
||||
@@ -4206,7 +4203,6 @@ int __sched __cond_resched_softirq(void)
|
||||
{
|
||||
BUG_ON(!in_softirq());
|
||||
|
||||
rcu_cond_resched(); /* BH disabled OK, just recording QSes. */
|
||||
if (should_resched()) {
|
||||
local_bh_enable();
|
||||
__cond_resched();
|
||||
|
@@ -608,7 +608,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
|
||||
avg_atom = p->se.sum_exec_runtime;
|
||||
if (nr_switches)
|
||||
do_div(avg_atom, nr_switches);
|
||||
avg_atom = div64_ul(avg_atom, nr_switches);
|
||||
else
|
||||
avg_atom = -1LL;
|
||||
|
||||
|
@@ -585,9 +585,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
|
||||
struct itimerspec *new_setting,
|
||||
struct itimerspec *old_setting)
|
||||
{
|
||||
ktime_t exp;
|
||||
|
||||
if (!rtcdev)
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (flags & ~TIMER_ABSTIME)
|
||||
return -EINVAL;
|
||||
|
||||
if (old_setting)
|
||||
alarm_timer_get(timr, old_setting);
|
||||
|
||||
@@ -597,8 +602,16 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
|
||||
|
||||
/* start the timer */
|
||||
timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
|
||||
alarm_start(&timr->it.alarm.alarmtimer,
|
||||
timespec_to_ktime(new_setting->it_value));
|
||||
exp = timespec_to_ktime(new_setting->it_value);
|
||||
/* Convert (if necessary) to absolute time */
|
||||
if (flags != TIMER_ABSTIME) {
|
||||
ktime_t now;
|
||||
|
||||
now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
|
||||
exp = ktime_add(now, exp);
|
||||
}
|
||||
|
||||
alarm_start(&timr->it.alarm.alarmtimer, exp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -730,6 +743,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
|
||||
if (!alarmtimer_get_rtcdev())
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (flags & ~TIMER_ABSTIME)
|
||||
return -EINVAL;
|
||||
|
||||
if (!capable(CAP_WAKE_ALARM))
|
||||
return -EPERM;
|
||||
|
||||
|
@@ -265,12 +265,12 @@ static void update_ftrace_function(void)
|
||||
func = ftrace_ops_list_func;
|
||||
}
|
||||
|
||||
update_function_graph_func();
|
||||
|
||||
/* If there's no change, then do nothing more here */
|
||||
if (ftrace_trace_function == func)
|
||||
return;
|
||||
|
||||
update_function_graph_func();
|
||||
|
||||
/*
|
||||
* If we are using the list function, it doesn't care
|
||||
* about the function_trace_ops.
|
||||
|
@@ -616,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
|
||||
struct ring_buffer_per_cpu *cpu_buffer;
|
||||
struct rb_irq_work *work;
|
||||
|
||||
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
|
||||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
|
||||
return POLLIN | POLLRDNORM;
|
||||
|
||||
if (cpu == RING_BUFFER_ALL_CPUS)
|
||||
work = &buffer->irq_work;
|
||||
else {
|
||||
|
@@ -466,6 +466,12 @@ int __trace_puts(unsigned long ip, const char *str, int size)
|
||||
struct print_entry *entry;
|
||||
unsigned long irq_flags;
|
||||
int alloc;
|
||||
int pc;
|
||||
|
||||
if (!(trace_flags & TRACE_ITER_PRINTK))
|
||||
return 0;
|
||||
|
||||
pc = preempt_count();
|
||||
|
||||
if (unlikely(tracing_selftest_running || tracing_disabled))
|
||||
return 0;
|
||||
@@ -475,7 +481,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
|
||||
local_save_flags(irq_flags);
|
||||
buffer = global_trace.trace_buffer.buffer;
|
||||
event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
|
||||
irq_flags, preempt_count());
|
||||
irq_flags, pc);
|
||||
if (!event)
|
||||
return 0;
|
||||
|
||||
@@ -492,6 +498,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
|
||||
entry->buf[size] = '\0';
|
||||
|
||||
__buffer_unlock_commit(buffer, event);
|
||||
ftrace_trace_stack(buffer, irq_flags, 4, pc);
|
||||
|
||||
return size;
|
||||
}
|
||||
@@ -509,6 +516,12 @@ int __trace_bputs(unsigned long ip, const char *str)
|
||||
struct bputs_entry *entry;
|
||||
unsigned long irq_flags;
|
||||
int size = sizeof(struct bputs_entry);
|
||||
int pc;
|
||||
|
||||
if (!(trace_flags & TRACE_ITER_PRINTK))
|
||||
return 0;
|
||||
|
||||
pc = preempt_count();
|
||||
|
||||
if (unlikely(tracing_selftest_running || tracing_disabled))
|
||||
return 0;
|
||||
@@ -516,7 +529,7 @@ int __trace_bputs(unsigned long ip, const char *str)
|
||||
local_save_flags(irq_flags);
|
||||
buffer = global_trace.trace_buffer.buffer;
|
||||
event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
|
||||
irq_flags, preempt_count());
|
||||
irq_flags, pc);
|
||||
if (!event)
|
||||
return 0;
|
||||
|
||||
@@ -525,6 +538,7 @@ int __trace_bputs(unsigned long ip, const char *str)
|
||||
entry->str = str;
|
||||
|
||||
__buffer_unlock_commit(buffer, event);
|
||||
ftrace_trace_stack(buffer, irq_flags, 4, pc);
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -809,7 +823,7 @@ static struct {
|
||||
{ trace_clock_local, "local", 1 },
|
||||
{ trace_clock_global, "global", 1 },
|
||||
{ trace_clock_counter, "counter", 0 },
|
||||
{ trace_clock_jiffies, "uptime", 1 },
|
||||
{ trace_clock_jiffies, "uptime", 0 },
|
||||
{ trace_clock, "perf", 1 },
|
||||
ARCH_TRACE_CLOCKS
|
||||
};
|
||||
|
@@ -59,13 +59,14 @@ u64 notrace trace_clock(void)
|
||||
|
||||
/*
|
||||
* trace_jiffy_clock(): Simply use jiffies as a clock counter.
|
||||
* Note that this use of jiffies_64 is not completely safe on
|
||||
* 32-bit systems. But the window is tiny, and the effect if
|
||||
* we are affected is that we will have an obviously bogus
|
||||
* timestamp on a trace event - i.e. not life threatening.
|
||||
*/
|
||||
u64 notrace trace_clock_jiffies(void)
|
||||
{
|
||||
u64 jiffy = jiffies - INITIAL_JIFFIES;
|
||||
|
||||
/* Return nsecs */
|
||||
return (u64)jiffies_to_usecs(jiffy) * 1000ULL;
|
||||
return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -470,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
|
||||
|
||||
list_del(&file->list);
|
||||
remove_subsystem(file->system);
|
||||
free_event_filter(file->filter);
|
||||
kmem_cache_free(file_cachep, file);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user