Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnad: "The main RCU related changes in this cycle were: - Removal of spin_unlock_wait() - SRCU updates - RCU torture-test updates - RCU Documentation updates - Extend the sys_membarrier() ABI with the MEMBARRIER_CMD_PRIVATE_EXPEDITED variant - Miscellaneous RCU fixes - CPU-hotplug fixes" * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (63 commits) arch: Remove spin_unlock_wait() arch-specific definitions locking: Remove spin_unlock_wait() generic definitions drivers/ata: Replace spin_unlock_wait() with lock/unlock pair ipc: Replace spin_unlock_wait() with lock/unlock pair exit: Replace spin_unlock_wait() with lock/unlock pair completion: Replace spin_unlock_wait() with lock/unlock pair doc: Set down RCU's scheduling-clock-interrupt needs doc: No longer allowed to use rcu_dereference on non-pointers doc: Add RCU files to docbook-generation files doc: Update memory-barriers.txt for read-to-write dependencies doc: Update RCU documentation membarrier: Provide expedited private command rcu: Remove exports from rcu_idle_exit() and rcu_idle_enter() rcu: Add warning to rcu_idle_enter() for irqs enabled rcu: Make rcu_idle_enter() rely on callers disabling irqs rcu: Add assertions verifying blocked-tasks list rcu/tracing: Set disable_rcu_irq_enter on rcu_eqs_exit() rcu: Add TPS() protection for _rcu_barrier_trace strings rcu: Use idle versions of swait to make idle-hack clear swait: Add idle variants which don't contribute to load average ...
This commit is contained in:
@@ -108,7 +108,6 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
|
||||
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
|
||||
obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
|
||||
obj-$(CONFIG_TORTURE_TEST) += torture.o
|
||||
obj-$(CONFIG_MEMBARRIER) += membarrier.o
|
||||
|
||||
obj-$(CONFIG_HAS_IOMEM) += memremap.o
|
||||
|
||||
|
@@ -650,6 +650,7 @@ static int takedown_cpu(unsigned int cpu)
|
||||
__cpu_die(cpu);
|
||||
|
||||
tick_cleanup_dead_cpu(cpu);
|
||||
rcutree_migrate_callbacks(cpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -764,7 +764,6 @@ void __noreturn do_exit(long code)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
int group_dead;
|
||||
TASKS_RCU(int tasks_rcu_i);
|
||||
|
||||
profile_task_exit(tsk);
|
||||
kcov_task_exit(tsk);
|
||||
@@ -819,7 +818,8 @@ void __noreturn do_exit(long code)
|
||||
* Ensure that we must observe the pi_state in exit_mm() ->
|
||||
* mm_release() -> exit_pi_state_list().
|
||||
*/
|
||||
raw_spin_unlock_wait(&tsk->pi_lock);
|
||||
raw_spin_lock_irq(&tsk->pi_lock);
|
||||
raw_spin_unlock_irq(&tsk->pi_lock);
|
||||
|
||||
if (unlikely(in_atomic())) {
|
||||
pr_info("note: %s[%d] exited with preempt_count %d\n",
|
||||
@@ -881,9 +881,7 @@ void __noreturn do_exit(long code)
|
||||
*/
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
|
||||
TASKS_RCU(preempt_disable());
|
||||
TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
|
||||
TASKS_RCU(preempt_enable());
|
||||
exit_tasks_rcu_start();
|
||||
exit_notify(tsk, group_dead);
|
||||
proc_exit_connector(tsk);
|
||||
mpol_put_task_policy(tsk);
|
||||
@@ -918,7 +916,7 @@ void __noreturn do_exit(long code)
|
||||
if (tsk->nr_dirtied)
|
||||
__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
|
||||
exit_rcu();
|
||||
TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
|
||||
exit_tasks_rcu_finish();
|
||||
|
||||
do_task_dead();
|
||||
}
|
||||
|
@@ -268,123 +268,6 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
|
||||
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Various notes on spin_is_locked() and spin_unlock_wait(), which are
|
||||
* 'interesting' functions:
|
||||
*
|
||||
* PROBLEM: some architectures have an interesting issue with atomic ACQUIRE
|
||||
* operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64,
|
||||
* PPC). Also qspinlock has a similar issue per construction, the setting of
|
||||
* the locked byte can be unordered acquiring the lock proper.
|
||||
*
|
||||
* This gets to be 'interesting' in the following cases, where the /should/s
|
||||
* end up false because of this issue.
|
||||
*
|
||||
*
|
||||
* CASE 1:
|
||||
*
|
||||
* So the spin_is_locked() correctness issue comes from something like:
|
||||
*
|
||||
* CPU0 CPU1
|
||||
*
|
||||
* global_lock(); local_lock(i)
|
||||
* spin_lock(&G) spin_lock(&L[i])
|
||||
* for (i) if (!spin_is_locked(&G)) {
|
||||
* spin_unlock_wait(&L[i]); smp_acquire__after_ctrl_dep();
|
||||
* return;
|
||||
* }
|
||||
* // deal with fail
|
||||
*
|
||||
* Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such
|
||||
* that there is exclusion between the two critical sections.
|
||||
*
|
||||
* The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from
|
||||
* spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i])
|
||||
* /should/ be constrained by the ACQUIRE from spin_lock(&G).
|
||||
*
|
||||
* Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB.
|
||||
*
|
||||
*
|
||||
* CASE 2:
|
||||
*
|
||||
* For spin_unlock_wait() there is a second correctness issue, namely:
|
||||
*
|
||||
* CPU0 CPU1
|
||||
*
|
||||
* flag = set;
|
||||
* smp_mb(); spin_lock(&l)
|
||||
* spin_unlock_wait(&l); if (!flag)
|
||||
* // add to lockless list
|
||||
* spin_unlock(&l);
|
||||
* // iterate lockless list
|
||||
*
|
||||
* Which wants to ensure that CPU1 will stop adding bits to the list and CPU0
|
||||
* will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE
|
||||
* semantics etc..)
|
||||
*
|
||||
* Where flag /should/ be ordered against the locked store of l.
|
||||
*/
|
||||
|
||||
/*
|
||||
* queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
|
||||
* issuing an _unordered_ store to set _Q_LOCKED_VAL.
|
||||
*
|
||||
* This means that the store can be delayed, but no later than the
|
||||
* store-release from the unlock. This means that simply observing
|
||||
* _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
|
||||
*
|
||||
* There are two paths that can issue the unordered store:
|
||||
*
|
||||
* (1) clear_pending_set_locked(): *,1,0 -> *,0,1
|
||||
*
|
||||
* (2) set_locked(): t,0,0 -> t,0,1 ; t != 0
|
||||
* atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1
|
||||
*
|
||||
* However, in both cases we have other !0 state we've set before to queue
|
||||
* ourseves:
|
||||
*
|
||||
* For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
|
||||
* load is constrained by that ACQUIRE to not pass before that, and thus must
|
||||
* observe the store.
|
||||
*
|
||||
* For (2) we have a more intersting scenario. We enqueue ourselves using
|
||||
* xchg_tail(), which ends up being a RELEASE. This in itself is not
|
||||
* sufficient, however that is followed by an smp_cond_acquire() on the same
|
||||
* word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
|
||||
* guarantees we must observe that store.
|
||||
*
|
||||
* Therefore both cases have other !0 state that is observable before the
|
||||
* unordered locked byte store comes through. This means we can use that to
|
||||
* wait for the lock store, and then wait for an unlock.
|
||||
*/
|
||||
#ifndef queued_spin_unlock_wait
|
||||
void queued_spin_unlock_wait(struct qspinlock *lock)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
for (;;) {
|
||||
val = atomic_read(&lock->val);
|
||||
|
||||
if (!val) /* not locked, we're done */
|
||||
goto done;
|
||||
|
||||
if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
|
||||
break;
|
||||
|
||||
/* not locked, but pending, wait until we observe the lock */
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/* any unlock is good */
|
||||
while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
|
||||
cpu_relax();
|
||||
|
||||
done:
|
||||
smp_acquire__after_ctrl_dep();
|
||||
}
|
||||
EXPORT_SYMBOL(queued_spin_unlock_wait);
|
||||
#endif
|
||||
|
||||
#endif /* _GEN_PV_LOCK_SLOWPATH */
|
||||
|
||||
/**
|
||||
|
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
||||
*
|
||||
* membarrier system call
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/membarrier.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
/*
|
||||
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
|
||||
* except MEMBARRIER_CMD_QUERY.
|
||||
*/
|
||||
#define MEMBARRIER_CMD_BITMASK (MEMBARRIER_CMD_SHARED)
|
||||
|
||||
/**
|
||||
* sys_membarrier - issue memory barriers on a set of threads
|
||||
* @cmd: Takes command values defined in enum membarrier_cmd.
|
||||
* @flags: Currently needs to be 0. For future extensions.
|
||||
*
|
||||
* If this system call is not implemented, -ENOSYS is returned. If the
|
||||
* command specified does not exist, or if the command argument is invalid,
|
||||
* this system call returns -EINVAL. For a given command, with flags argument
|
||||
* set to 0, this system call is guaranteed to always return the same value
|
||||
* until reboot.
|
||||
*
|
||||
* All memory accesses performed in program order from each targeted thread
|
||||
* is guaranteed to be ordered with respect to sys_membarrier(). If we use
|
||||
* the semantic "barrier()" to represent a compiler barrier forcing memory
|
||||
* accesses to be performed in program order across the barrier, and
|
||||
* smp_mb() to represent explicit memory barriers forcing full memory
|
||||
* ordering across the barrier, we have the following ordering table for
|
||||
* each pair of barrier(), sys_membarrier() and smp_mb():
|
||||
*
|
||||
* The pair ordering is detailed as (O: ordered, X: not ordered):
|
||||
*
|
||||
* barrier() smp_mb() sys_membarrier()
|
||||
* barrier() X X O
|
||||
* smp_mb() X O O
|
||||
* sys_membarrier() O O O
|
||||
*/
|
||||
SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
|
||||
{
|
||||
/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
|
||||
if (tick_nohz_full_enabled())
|
||||
return -ENOSYS;
|
||||
if (unlikely(flags))
|
||||
return -EINVAL;
|
||||
switch (cmd) {
|
||||
case MEMBARRIER_CMD_QUERY:
|
||||
return MEMBARRIER_CMD_BITMASK;
|
||||
case MEMBARRIER_CMD_SHARED:
|
||||
if (num_online_cpus() > 1)
|
||||
synchronize_sched();
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
@@ -69,8 +69,7 @@ config TREE_SRCU
|
||||
This option selects the full-fledged version of SRCU.
|
||||
|
||||
config TASKS_RCU
|
||||
bool
|
||||
default n
|
||||
def_bool PREEMPT
|
||||
select SRCU
|
||||
help
|
||||
This option enables a task-based RCU implementation that uses
|
||||
|
128
kernel/rcu/rcu.h
128
kernel/rcu/rcu.h
@@ -356,22 +356,10 @@ do { \
|
||||
|
||||
#ifdef CONFIG_TINY_RCU
|
||||
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
|
||||
static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */
|
||||
{
|
||||
return true;
|
||||
}
|
||||
static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void rcu_expedite_gp(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void rcu_unexpedite_gp(void)
|
||||
{
|
||||
}
|
||||
static inline bool rcu_gp_is_normal(void) { return true; }
|
||||
static inline bool rcu_gp_is_expedited(void) { return false; }
|
||||
static inline void rcu_expedite_gp(void) { }
|
||||
static inline void rcu_unexpedite_gp(void) { }
|
||||
#else /* #ifdef CONFIG_TINY_RCU */
|
||||
bool rcu_gp_is_normal(void); /* Internal RCU use. */
|
||||
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
|
||||
@@ -419,12 +407,8 @@ static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
*gpnum = 0;
|
||||
*completed = 0;
|
||||
}
|
||||
static inline void rcutorture_record_test_transition(void)
|
||||
{
|
||||
}
|
||||
static inline void rcutorture_record_progress(unsigned long vernum)
|
||||
{
|
||||
}
|
||||
static inline void rcutorture_record_test_transition(void) { }
|
||||
static inline void rcutorture_record_progress(unsigned long vernum) { }
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
void do_trace_rcu_torture_read(const char *rcutorturename,
|
||||
struct rcu_head *rhp,
|
||||
@@ -460,92 +444,20 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TINY_RCU
|
||||
|
||||
/*
|
||||
* Return the number of grace periods started.
|
||||
*/
|
||||
static inline unsigned long rcu_batches_started(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of bottom-half grace periods started.
|
||||
*/
|
||||
static inline unsigned long rcu_batches_started_bh(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of sched grace periods started.
|
||||
*/
|
||||
static inline unsigned long rcu_batches_started_sched(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of grace periods completed.
|
||||
*/
|
||||
static inline unsigned long rcu_batches_completed(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of bottom-half grace periods completed.
|
||||
*/
|
||||
static inline unsigned long rcu_batches_completed_bh(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of sched grace periods completed.
|
||||
*/
|
||||
static inline unsigned long rcu_batches_completed_sched(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of expedited grace periods completed.
|
||||
*/
|
||||
static inline unsigned long rcu_exp_batches_completed(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of expedited sched grace periods completed.
|
||||
*/
|
||||
static inline unsigned long rcu_exp_batches_completed_sched(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void rcu_force_quiescent_state(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void rcu_bh_force_quiescent_state(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void rcu_sched_force_quiescent_state(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void show_rcu_gp_kthreads(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline unsigned long rcu_batches_started(void) { return 0; }
|
||||
static inline unsigned long rcu_batches_started_bh(void) { return 0; }
|
||||
static inline unsigned long rcu_batches_started_sched(void) { return 0; }
|
||||
static inline unsigned long rcu_batches_completed(void) { return 0; }
|
||||
static inline unsigned long rcu_batches_completed_bh(void) { return 0; }
|
||||
static inline unsigned long rcu_batches_completed_sched(void) { return 0; }
|
||||
static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
|
||||
static inline unsigned long rcu_exp_batches_completed_sched(void) { return 0; }
|
||||
static inline unsigned long
|
||||
srcu_batches_completed(struct srcu_struct *sp) { return 0; }
|
||||
static inline void rcu_force_quiescent_state(void) { }
|
||||
static inline void rcu_bh_force_quiescent_state(void) { }
|
||||
static inline void rcu_sched_force_quiescent_state(void) { }
|
||||
static inline void show_rcu_gp_kthreads(void) { }
|
||||
#else /* #ifdef CONFIG_TINY_RCU */
|
||||
extern unsigned long rcutorture_testseq;
|
||||
extern unsigned long rcutorture_vernum;
|
||||
|
@@ -35,24 +35,6 @@ void rcu_cblist_init(struct rcu_cblist *rclp)
|
||||
rclp->len_lazy = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Debug function to actually count the number of callbacks.
|
||||
* If the number exceeds the limit specified, return -1.
|
||||
*/
|
||||
long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
|
||||
{
|
||||
int cnt = 0;
|
||||
struct rcu_head **rhpp = &rclp->head;
|
||||
|
||||
for (;;) {
|
||||
if (!*rhpp)
|
||||
return cnt;
|
||||
if (++cnt > lim)
|
||||
return -1;
|
||||
rhpp = &(*rhpp)->next;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Dequeue the oldest rcu_head structure from the specified callback
|
||||
* list. This function assumes that the callback is non-lazy, but
|
||||
@@ -102,17 +84,6 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
|
||||
rsclp->tails[RCU_NEXT_TAIL] = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Is the specified segment of the specified rcu_segcblist structure
|
||||
* empty of callbacks?
|
||||
*/
|
||||
bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
|
||||
{
|
||||
if (seg == RCU_DONE_TAIL)
|
||||
return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
|
||||
return rsclp->tails[seg - 1] == rsclp->tails[seg];
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the specified rcu_segcblist structure contain callbacks that
|
||||
* are ready to be invoked?
|
||||
@@ -133,50 +104,6 @@ bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
|
||||
!rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Dequeue and return the first ready-to-invoke callback. If there
|
||||
* are no ready-to-invoke callbacks, return NULL. Disables interrupts
|
||||
* to avoid interference. Does not protect from interference from other
|
||||
* CPUs or tasks.
|
||||
*/
|
||||
struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
struct rcu_head *rhp;
|
||||
|
||||
local_irq_save(flags);
|
||||
if (!rcu_segcblist_ready_cbs(rsclp)) {
|
||||
local_irq_restore(flags);
|
||||
return NULL;
|
||||
}
|
||||
rhp = rsclp->head;
|
||||
BUG_ON(!rhp);
|
||||
rsclp->head = rhp->next;
|
||||
for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
|
||||
if (rsclp->tails[i] != &rhp->next)
|
||||
break;
|
||||
rsclp->tails[i] = &rsclp->head;
|
||||
}
|
||||
smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
|
||||
WRITE_ONCE(rsclp->len, rsclp->len - 1);
|
||||
local_irq_restore(flags);
|
||||
return rhp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Account for the fact that a previously dequeued callback turned out
|
||||
* to be marked as lazy.
|
||||
*/
|
||||
void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
rsclp->len_lazy--;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a pointer to the first callback in the specified rcu_segcblist
|
||||
* structure. This is useful for diagnostics.
|
||||
@@ -202,17 +129,6 @@ struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the specified rcu_segcblist structure contain callbacks that
|
||||
* have not yet been processed beyond having been posted, that is,
|
||||
* does it contain callbacks in its last segment?
|
||||
*/
|
||||
bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
|
||||
{
|
||||
return rcu_segcblist_is_enabled(rsclp) &&
|
||||
!rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enqueue the specified callback onto the specified rcu_segcblist
|
||||
* structure, updating accounting as needed. Note that the ->len
|
||||
@@ -503,3 +419,27 @@ bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge the source rcu_segcblist structure into the destination
|
||||
* rcu_segcblist structure, then initialize the source. Any pending
|
||||
* callbacks from the source get to start over. It is best to
|
||||
* advance and accelerate both the destination and the source
|
||||
* before merging.
|
||||
*/
|
||||
void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
|
||||
struct rcu_segcblist *src_rsclp)
|
||||
{
|
||||
struct rcu_cblist donecbs;
|
||||
struct rcu_cblist pendcbs;
|
||||
|
||||
rcu_cblist_init(&donecbs);
|
||||
rcu_cblist_init(&pendcbs);
|
||||
rcu_segcblist_extract_count(src_rsclp, &donecbs);
|
||||
rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs);
|
||||
rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs);
|
||||
rcu_segcblist_insert_count(dst_rsclp, &donecbs);
|
||||
rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs);
|
||||
rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs);
|
||||
rcu_segcblist_init(src_rsclp);
|
||||
}
|
||||
|
@@ -31,29 +31,7 @@ static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
|
||||
rclp->len_lazy--;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interim function to return rcu_cblist head pointer. Longer term, the
|
||||
* rcu_cblist will be used more pervasively, removing the need for this
|
||||
* function.
|
||||
*/
|
||||
static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
|
||||
{
|
||||
return rclp->head;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interim function to return rcu_cblist head pointer. Longer term, the
|
||||
* rcu_cblist will be used more pervasively, removing the need for this
|
||||
* function.
|
||||
*/
|
||||
static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
|
||||
{
|
||||
WARN_ON_ONCE(!rclp->head);
|
||||
return rclp->tail;
|
||||
}
|
||||
|
||||
void rcu_cblist_init(struct rcu_cblist *rclp);
|
||||
long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim);
|
||||
struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp);
|
||||
|
||||
/*
|
||||
@@ -134,14 +112,10 @@ static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
|
||||
|
||||
void rcu_segcblist_init(struct rcu_segcblist *rsclp);
|
||||
void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
|
||||
bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg);
|
||||
bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
|
||||
bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
|
||||
struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp);
|
||||
void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp);
|
||||
struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
|
||||
struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp);
|
||||
bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp);
|
||||
void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
|
||||
struct rcu_head *rhp, bool lazy);
|
||||
bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
|
||||
@@ -162,3 +136,5 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
|
||||
bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
|
||||
bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
|
||||
unsigned long seq);
|
||||
void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
|
||||
struct rcu_segcblist *src_rsclp);
|
||||
|
@@ -317,8 +317,6 @@ static struct rcu_perf_ops sched_ops = {
|
||||
.name = "sched"
|
||||
};
|
||||
|
||||
#ifdef CONFIG_TASKS_RCU
|
||||
|
||||
/*
|
||||
* Definitions for RCU-tasks perf testing.
|
||||
*/
|
||||
@@ -346,24 +344,11 @@ static struct rcu_perf_ops tasks_ops = {
|
||||
.name = "tasks"
|
||||
};
|
||||
|
||||
#define RCUPERF_TASKS_OPS &tasks_ops,
|
||||
|
||||
static bool __maybe_unused torturing_tasks(void)
|
||||
{
|
||||
return cur_ops == &tasks_ops;
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_TASKS_RCU */
|
||||
|
||||
#define RCUPERF_TASKS_OPS
|
||||
|
||||
static bool __maybe_unused torturing_tasks(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_TASKS_RCU */
|
||||
|
||||
/*
|
||||
* If performance tests complete, wait for shutdown to commence.
|
||||
*/
|
||||
@@ -658,7 +643,7 @@ rcu_perf_init(void)
|
||||
int firsterr = 0;
|
||||
static struct rcu_perf_ops *perf_ops[] = {
|
||||
&rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops,
|
||||
RCUPERF_TASKS_OPS
|
||||
&tasks_ops,
|
||||
};
|
||||
|
||||
if (!torture_init_begin(perf_type, verbose, &perf_runnable))
|
||||
|
@@ -199,7 +199,8 @@ MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
|
||||
static u64 notrace rcu_trace_clock_local(void)
|
||||
{
|
||||
u64 ts = trace_clock_local();
|
||||
unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
|
||||
|
||||
(void)do_div(ts, NSEC_PER_USEC);
|
||||
return ts;
|
||||
}
|
||||
#else /* #ifdef CONFIG_RCU_TRACE */
|
||||
@@ -496,7 +497,7 @@ static struct rcu_torture_ops rcu_busted_ops = {
|
||||
.fqs = NULL,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
.name = "rcu_busted"
|
||||
.name = "busted"
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -522,7 +523,7 @@ static void srcu_read_delay(struct torture_random_state *rrsp)
|
||||
|
||||
delay = torture_random(rrsp) %
|
||||
(nrealreaders * 2 * longdelay * uspertick);
|
||||
if (!delay)
|
||||
if (!delay && in_task())
|
||||
schedule_timeout_interruptible(longdelay);
|
||||
else
|
||||
rcu_read_delay(rrsp);
|
||||
@@ -561,44 +562,7 @@ static void srcu_torture_barrier(void)
|
||||
|
||||
static void srcu_torture_stats(void)
|
||||
{
|
||||
int __maybe_unused cpu;
|
||||
int idx;
|
||||
|
||||
#ifdef CONFIG_TREE_SRCU
|
||||
idx = srcu_ctlp->srcu_idx & 0x1;
|
||||
pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
|
||||
torture_type, TORTURE_FLAG, idx);
|
||||
for_each_possible_cpu(cpu) {
|
||||
unsigned long l0, l1;
|
||||
unsigned long u0, u1;
|
||||
long c0, c1;
|
||||
struct srcu_data *counts;
|
||||
|
||||
counts = per_cpu_ptr(srcu_ctlp->sda, cpu);
|
||||
u0 = counts->srcu_unlock_count[!idx];
|
||||
u1 = counts->srcu_unlock_count[idx];
|
||||
|
||||
/*
|
||||
* Make sure that a lock is always counted if the corresponding
|
||||
* unlock is counted.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
l0 = counts->srcu_lock_count[!idx];
|
||||
l1 = counts->srcu_lock_count[idx];
|
||||
|
||||
c0 = l0 - u0;
|
||||
c1 = l1 - u1;
|
||||
pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
|
||||
}
|
||||
pr_cont("\n");
|
||||
#elif defined(CONFIG_TINY_SRCU)
|
||||
idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1;
|
||||
pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
|
||||
torture_type, TORTURE_FLAG, idx,
|
||||
READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]),
|
||||
READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx]));
|
||||
#endif
|
||||
srcu_torture_stats_print(srcu_ctlp, torture_type, TORTURE_FLAG);
|
||||
}
|
||||
|
||||
static void srcu_torture_synchronize_expedited(void)
|
||||
@@ -620,6 +584,7 @@ static struct rcu_torture_ops srcu_ops = {
|
||||
.call = srcu_torture_call,
|
||||
.cb_barrier = srcu_torture_barrier,
|
||||
.stats = srcu_torture_stats,
|
||||
.irq_capable = 1,
|
||||
.name = "srcu"
|
||||
};
|
||||
|
||||
@@ -652,6 +617,7 @@ static struct rcu_torture_ops srcud_ops = {
|
||||
.call = srcu_torture_call,
|
||||
.cb_barrier = srcu_torture_barrier,
|
||||
.stats = srcu_torture_stats,
|
||||
.irq_capable = 1,
|
||||
.name = "srcud"
|
||||
};
|
||||
|
||||
@@ -696,8 +662,6 @@ static struct rcu_torture_ops sched_ops = {
|
||||
.name = "sched"
|
||||
};
|
||||
|
||||
#ifdef CONFIG_TASKS_RCU
|
||||
|
||||
/*
|
||||
* Definitions for RCU-tasks torture testing.
|
||||
*/
|
||||
@@ -735,24 +699,11 @@ static struct rcu_torture_ops tasks_ops = {
|
||||
.name = "tasks"
|
||||
};
|
||||
|
||||
#define RCUTORTURE_TASKS_OPS &tasks_ops,
|
||||
|
||||
static bool __maybe_unused torturing_tasks(void)
|
||||
{
|
||||
return cur_ops == &tasks_ops;
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_TASKS_RCU */
|
||||
|
||||
#define RCUTORTURE_TASKS_OPS
|
||||
|
||||
static bool __maybe_unused torturing_tasks(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_TASKS_RCU */
|
||||
|
||||
/*
|
||||
* RCU torture priority-boost testing. Runs one real-time thread per
|
||||
* CPU for moderate bursts, repeatedly registering RCU callbacks and
|
||||
@@ -1114,6 +1065,11 @@ rcu_torture_fakewriter(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rcu_torture_timer_cb(struct rcu_head *rhp)
|
||||
{
|
||||
kfree(rhp);
|
||||
}
|
||||
|
||||
/*
|
||||
* RCU torture reader from timer handler. Dereferences rcu_torture_current,
|
||||
* incrementing the corresponding element of the pipeline array. The
|
||||
@@ -1176,6 +1132,14 @@ static void rcu_torture_timer(unsigned long unused)
|
||||
__this_cpu_inc(rcu_torture_batch[completed]);
|
||||
preempt_enable();
|
||||
cur_ops->readunlock(idx);
|
||||
|
||||
/* Test call_rcu() invocation from interrupt handler. */
|
||||
if (cur_ops->call) {
|
||||
struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_NOWAIT);
|
||||
|
||||
if (rhp)
|
||||
cur_ops->call(rhp, rcu_torture_timer_cb);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1354,11 +1318,12 @@ rcu_torture_stats_print(void)
|
||||
srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
|
||||
&flags, &gpnum, &completed);
|
||||
wtp = READ_ONCE(writer_task);
|
||||
pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n",
|
||||
pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx cpu %d\n",
|
||||
rcu_torture_writer_state_getname(),
|
||||
rcu_torture_writer_state,
|
||||
gpnum, completed, flags,
|
||||
wtp == NULL ? ~0UL : wtp->state);
|
||||
wtp == NULL ? ~0UL : wtp->state,
|
||||
wtp == NULL ? -1 : (int)task_cpu(wtp));
|
||||
show_rcu_gp_kthreads();
|
||||
rcu_ftrace_dump(DUMP_ALL);
|
||||
}
|
||||
@@ -1749,7 +1714,7 @@ rcu_torture_init(void)
|
||||
int firsterr = 0;
|
||||
static struct rcu_torture_ops *torture_ops[] = {
|
||||
&rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
|
||||
&sched_ops, RCUTORTURE_TASKS_OPS
|
||||
&sched_ops, &tasks_ops,
|
||||
};
|
||||
|
||||
if (!torture_init_begin(torture_type, verbose, &torture_runnable))
|
||||
|
@@ -33,6 +33,8 @@
|
||||
#include "rcu_segcblist.h"
|
||||
#include "rcu.h"
|
||||
|
||||
int rcu_scheduler_active __read_mostly;
|
||||
|
||||
static int init_srcu_struct_fields(struct srcu_struct *sp)
|
||||
{
|
||||
sp->srcu_lock_nesting[0] = 0;
|
||||
@@ -193,3 +195,9 @@ void synchronize_srcu(struct srcu_struct *sp)
|
||||
destroy_rcu_head_on_stack(&rs.head);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_srcu);
|
||||
|
||||
/* Lockdep diagnostics. */
|
||||
void __init rcu_scheduler_starting(void)
|
||||
{
|
||||
rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
|
||||
}
|
||||
|
@@ -51,6 +51,7 @@ module_param(counter_wrap_check, ulong, 0444);
|
||||
|
||||
static void srcu_invoke_callbacks(struct work_struct *work);
|
||||
static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
|
||||
static void process_srcu(struct work_struct *work);
|
||||
|
||||
/*
|
||||
* Initialize SRCU combining tree. Note that statically allocated
|
||||
@@ -896,6 +897,15 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
|
||||
__call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm);
|
||||
wait_for_completion(&rcu.completion);
|
||||
destroy_rcu_head_on_stack(&rcu.head);
|
||||
|
||||
/*
|
||||
* Make sure that later code is ordered after the SRCU grace
|
||||
* period. This pairs with the raw_spin_lock_irq_rcu_node()
|
||||
* in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed
|
||||
* because the current CPU might have been totally uninvolved with
|
||||
* (and thus unordered against) that grace period.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1194,7 +1204,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
|
||||
/*
|
||||
* This is the work-queue function that handles SRCU grace periods.
|
||||
*/
|
||||
void process_srcu(struct work_struct *work)
|
||||
static void process_srcu(struct work_struct *work)
|
||||
{
|
||||
struct srcu_struct *sp;
|
||||
|
||||
@@ -1203,7 +1213,6 @@ void process_srcu(struct work_struct *work)
|
||||
srcu_advance_state(sp);
|
||||
srcu_reschedule(sp, srcu_get_delay(sp));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(process_srcu);
|
||||
|
||||
void srcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
struct srcu_struct *sp, int *flags,
|
||||
@@ -1217,6 +1226,43 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
|
||||
|
||||
void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf)
|
||||
{
|
||||
int cpu;
|
||||
int idx;
|
||||
unsigned long s0 = 0, s1 = 0;
|
||||
|
||||
idx = sp->srcu_idx & 0x1;
|
||||
pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", tt, tf, idx);
|
||||
for_each_possible_cpu(cpu) {
|
||||
unsigned long l0, l1;
|
||||
unsigned long u0, u1;
|
||||
long c0, c1;
|
||||
struct srcu_data *counts;
|
||||
|
||||
counts = per_cpu_ptr(sp->sda, cpu);
|
||||
u0 = counts->srcu_unlock_count[!idx];
|
||||
u1 = counts->srcu_unlock_count[idx];
|
||||
|
||||
/*
|
||||
* Make sure that a lock is always counted if the corresponding
|
||||
* unlock is counted.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
l0 = counts->srcu_lock_count[!idx];
|
||||
l1 = counts->srcu_lock_count[idx];
|
||||
|
||||
c0 = l0 - u0;
|
||||
c1 = l1 - u1;
|
||||
pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
|
||||
s0 += c0;
|
||||
s1 += c1;
|
||||
}
|
||||
pr_cont(" T(%ld,%ld)\n", s0, s1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(srcu_torture_stats_print);
|
||||
|
||||
static int __init srcu_bootup_announce(void)
|
||||
{
|
||||
pr_info("Hierarchical SRCU implementation.\n");
|
||||
|
@@ -56,8 +56,6 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
|
||||
.curtail = &rcu_bh_ctrlblk.rcucblist,
|
||||
};
|
||||
|
||||
#include "tiny_plugin.h"
|
||||
|
||||
void rcu_barrier_bh(void)
|
||||
{
|
||||
wait_rcu_gp(call_rcu_bh);
|
||||
|
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
|
||||
* Internal non-public definitions that provide either classic
|
||||
* or preemptible semantics.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, you can access it online at
|
||||
* http://www.gnu.org/licenses/gpl-2.0.html.
|
||||
*
|
||||
* Copyright (c) 2010 Linaro
|
||||
*
|
||||
* Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
|
||||
#include <linux/kernel_stat.h>
|
||||
|
||||
int rcu_scheduler_active __read_mostly;
|
||||
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
|
||||
|
||||
/*
|
||||
* During boot, we forgive RCU lockdep issues. After this function is
|
||||
* invoked, we start taking RCU lockdep issues seriously. Note that unlike
|
||||
* Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE
|
||||
* to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage.
|
||||
* The reason for this is that Tiny RCU does not need kthreads, so does
|
||||
* not have to care about the fact that the scheduler is half-initialized
|
||||
* at a certain phase of the boot process. Unless SRCU is in the mix.
|
||||
*/
|
||||
void __init rcu_scheduler_starting(void)
|
||||
{
|
||||
WARN_ON(nr_context_switches() > 0);
|
||||
rcu_scheduler_active = IS_ENABLED(CONFIG_SRCU)
|
||||
? RCU_SCHEDULER_INIT : RCU_SCHEDULER_RUNNING;
|
||||
}
|
||||
|
||||
#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
|
@@ -97,9 +97,6 @@ struct rcu_state sname##_state = { \
|
||||
.gp_state = RCU_GP_IDLE, \
|
||||
.gpnum = 0UL - 300UL, \
|
||||
.completed = 0UL - 300UL, \
|
||||
.orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
|
||||
.orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \
|
||||
.orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \
|
||||
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
|
||||
.name = RCU_STATE_NAME(sname), \
|
||||
.abbr = sabbr, \
|
||||
@@ -843,13 +840,9 @@ static void rcu_eqs_enter(bool user)
|
||||
*/
|
||||
void rcu_idle_enter(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_idle_enter() invoked with irqs enabled!!!");
|
||||
rcu_eqs_enter(false);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_idle_enter);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
/**
|
||||
@@ -862,7 +855,8 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
|
||||
*/
|
||||
void rcu_user_enter(void)
|
||||
{
|
||||
rcu_eqs_enter(1);
|
||||
RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_user_enter() invoked with irqs enabled!!!");
|
||||
rcu_eqs_enter(true);
|
||||
}
|
||||
#endif /* CONFIG_NO_HZ_FULL */
|
||||
|
||||
@@ -955,8 +949,10 @@ static void rcu_eqs_exit(bool user)
|
||||
if (oldval & DYNTICK_TASK_NEST_MASK) {
|
||||
rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
|
||||
} else {
|
||||
__this_cpu_inc(disable_rcu_irq_enter);
|
||||
rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
|
||||
rcu_eqs_exit_common(oldval, user);
|
||||
__this_cpu_dec(disable_rcu_irq_enter);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -979,7 +975,6 @@ void rcu_idle_exit(void)
|
||||
rcu_eqs_exit(false);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_idle_exit);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
/**
|
||||
@@ -1358,12 +1353,13 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
|
||||
j = jiffies;
|
||||
gpa = READ_ONCE(rsp->gp_activity);
|
||||
if (j - gpa > 2 * HZ) {
|
||||
pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
|
||||
pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
|
||||
rsp->name, j - gpa,
|
||||
rsp->gpnum, rsp->completed,
|
||||
rsp->gp_flags,
|
||||
gp_state_getname(rsp->gp_state), rsp->gp_state,
|
||||
rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
|
||||
rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
|
||||
rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);
|
||||
if (rsp->gp_kthread) {
|
||||
sched_show_task(rsp->gp_kthread);
|
||||
wake_up_process(rsp->gp_kthread);
|
||||
@@ -2067,8 +2063,8 @@ static bool rcu_gp_init(struct rcu_state *rsp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for wait_event_interruptible_timeout() wakeup
|
||||
* at force-quiescent-state time.
|
||||
* Helper function for swait_event_idle() wakeup at force-quiescent-state
|
||||
* time.
|
||||
*/
|
||||
static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
|
||||
{
|
||||
@@ -2206,9 +2202,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
|
||||
READ_ONCE(rsp->gpnum),
|
||||
TPS("reqwait"));
|
||||
rsp->gp_state = RCU_GP_WAIT_GPS;
|
||||
swait_event_interruptible(rsp->gp_wq,
|
||||
READ_ONCE(rsp->gp_flags) &
|
||||
RCU_GP_FLAG_INIT);
|
||||
swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
|
||||
RCU_GP_FLAG_INIT);
|
||||
rsp->gp_state = RCU_GP_DONE_GPS;
|
||||
/* Locking provides needed memory barrier. */
|
||||
if (rcu_gp_init(rsp))
|
||||
@@ -2239,7 +2234,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
|
||||
READ_ONCE(rsp->gpnum),
|
||||
TPS("fqswait"));
|
||||
rsp->gp_state = RCU_GP_WAIT_FQS;
|
||||
ret = swait_event_interruptible_timeout(rsp->gp_wq,
|
||||
ret = swait_event_idle_timeout(rsp->gp_wq,
|
||||
rcu_gp_fqs_check_wake(rsp, &gf), j);
|
||||
rsp->gp_state = RCU_GP_DOING_FQS;
|
||||
/* Locking provides needed memory barriers. */
|
||||
@@ -2409,6 +2404,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
|
||||
return;
|
||||
}
|
||||
WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
|
||||
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 &&
|
||||
rcu_preempt_blocked_readers_cgp(rnp));
|
||||
rnp->qsmask &= ~mask;
|
||||
trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
|
||||
mask, rnp->qsmask, rnp->level,
|
||||
@@ -2562,85 +2559,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Send the specified CPU's RCU callbacks to the orphanage. The
|
||||
* specified CPU must be offline, and the caller must hold the
|
||||
* ->orphan_lock.
|
||||
*/
|
||||
static void
|
||||
rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
|
||||
struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
{
|
||||
lockdep_assert_held(&rsp->orphan_lock);
|
||||
|
||||
/* No-CBs CPUs do not have orphanable callbacks. */
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Orphan the callbacks. First adjust the counts. This is safe
|
||||
* because _rcu_barrier() excludes CPU-hotplug operations, so it
|
||||
* cannot be running now. Thus no memory barrier is required.
|
||||
*/
|
||||
rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist);
|
||||
rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done);
|
||||
|
||||
/*
|
||||
* Next, move those callbacks still needing a grace period to
|
||||
* the orphanage, where some other CPU will pick them up.
|
||||
* Some of the callbacks might have gone partway through a grace
|
||||
* period, but that is too bad. They get to start over because we
|
||||
* cannot assume that grace periods are synchronized across CPUs.
|
||||
*/
|
||||
rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
|
||||
|
||||
/*
|
||||
* Then move the ready-to-invoke callbacks to the orphanage,
|
||||
* where some other CPU will pick them up. These will not be
|
||||
* required to pass though another grace period: They are done.
|
||||
*/
|
||||
rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done);
|
||||
|
||||
/* Finally, disallow further callbacks on this CPU. */
|
||||
rcu_segcblist_disable(&rdp->cblist);
|
||||
}
|
||||
|
||||
/*
|
||||
* Adopt the RCU callbacks from the specified rcu_state structure's
|
||||
* orphanage. The caller must hold the ->orphan_lock.
|
||||
*/
|
||||
static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
|
||||
{
|
||||
struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
|
||||
|
||||
lockdep_assert_held(&rsp->orphan_lock);
|
||||
|
||||
/* No-CBs CPUs are handled specially. */
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
|
||||
rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
|
||||
return;
|
||||
|
||||
/* Do the accounting first. */
|
||||
rdp->n_cbs_adopted += rsp->orphan_done.len;
|
||||
if (rsp->orphan_done.len_lazy != rsp->orphan_done.len)
|
||||
rcu_idle_count_callbacks_posted();
|
||||
rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
|
||||
|
||||
/*
|
||||
* We do not need a memory barrier here because the only way we
|
||||
* can get here if there is an rcu_barrier() in flight is if
|
||||
* we are the task doing the rcu_barrier().
|
||||
*/
|
||||
|
||||
/* First adopt the ready-to-invoke callbacks, then the done ones. */
|
||||
rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
|
||||
WARN_ON_ONCE(rsp->orphan_done.head);
|
||||
rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
|
||||
WARN_ON_ONCE(rsp->orphan_pend.head);
|
||||
WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
|
||||
!rcu_segcblist_n_cbs(&rdp->cblist));
|
||||
}
|
||||
|
||||
/*
|
||||
* Trace the fact that this CPU is going offline.
|
||||
*/
|
||||
@@ -2704,14 +2622,12 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
|
||||
|
||||
/*
|
||||
* The CPU has been completely removed, and some other CPU is reporting
|
||||
* this fact from process context. Do the remainder of the cleanup,
|
||||
* including orphaning the outgoing CPU's RCU callbacks, and also
|
||||
* adopting them. There can only be one CPU hotplug operation at a time,
|
||||
* so no other CPU can be attempting to update rcu_cpu_kthread_task.
|
||||
* this fact from process context. Do the remainder of the cleanup.
|
||||
* There can only be one CPU hotplug operation at a time, so no need for
|
||||
* explicit locking.
|
||||
*/
|
||||
static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
|
||||
|
||||
@@ -2720,18 +2636,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
|
||||
|
||||
/* Adjust any no-longer-needed kthreads. */
|
||||
rcu_boost_kthread_setaffinity(rnp, -1);
|
||||
|
||||
/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
|
||||
raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
|
||||
rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
|
||||
rcu_adopt_orphan_cbs(rsp, flags);
|
||||
raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
|
||||
|
||||
WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
|
||||
!rcu_segcblist_empty(&rdp->cblist),
|
||||
"rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
|
||||
cpu, rcu_segcblist_n_cbs(&rdp->cblist),
|
||||
rcu_segcblist_first_cb(&rdp->cblist));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3569,10 +3473,11 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
|
||||
struct rcu_state *rsp = rdp->rsp;
|
||||
|
||||
if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
|
||||
_rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence);
|
||||
_rcu_barrier_trace(rsp, TPS("LastCB"), -1,
|
||||
rsp->barrier_sequence);
|
||||
complete(&rsp->barrier_completion);
|
||||
} else {
|
||||
_rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence);
|
||||
_rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3584,14 +3489,15 @@ static void rcu_barrier_func(void *type)
|
||||
struct rcu_state *rsp = type;
|
||||
struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
|
||||
|
||||
_rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence);
|
||||
_rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence);
|
||||
rdp->barrier_head.func = rcu_barrier_callback;
|
||||
debug_rcu_head_queue(&rdp->barrier_head);
|
||||
if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
|
||||
atomic_inc(&rsp->barrier_cpu_count);
|
||||
} else {
|
||||
debug_rcu_head_unqueue(&rdp->barrier_head);
|
||||
_rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence);
|
||||
_rcu_barrier_trace(rsp, TPS("IRQNQ"), -1,
|
||||
rsp->barrier_sequence);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3605,14 +3511,15 @@ static void _rcu_barrier(struct rcu_state *rsp)
|
||||
struct rcu_data *rdp;
|
||||
unsigned long s = rcu_seq_snap(&rsp->barrier_sequence);
|
||||
|
||||
_rcu_barrier_trace(rsp, "Begin", -1, s);
|
||||
_rcu_barrier_trace(rsp, TPS("Begin"), -1, s);
|
||||
|
||||
/* Take mutex to serialize concurrent rcu_barrier() requests. */
|
||||
mutex_lock(&rsp->barrier_mutex);
|
||||
|
||||
/* Did someone else do our work for us? */
|
||||
if (rcu_seq_done(&rsp->barrier_sequence, s)) {
|
||||
_rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence);
|
||||
_rcu_barrier_trace(rsp, TPS("EarlyExit"), -1,
|
||||
rsp->barrier_sequence);
|
||||
smp_mb(); /* caller's subsequent code after above check. */
|
||||
mutex_unlock(&rsp->barrier_mutex);
|
||||
return;
|
||||
@@ -3620,7 +3527,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
|
||||
|
||||
/* Mark the start of the barrier operation. */
|
||||
rcu_seq_start(&rsp->barrier_sequence);
|
||||
_rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence);
|
||||
_rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence);
|
||||
|
||||
/*
|
||||
* Initialize the count to one rather than to zero in order to
|
||||
@@ -3643,10 +3550,10 @@ static void _rcu_barrier(struct rcu_state *rsp)
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
if (rcu_is_nocb_cpu(cpu)) {
|
||||
if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
|
||||
_rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
|
||||
_rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu,
|
||||
rsp->barrier_sequence);
|
||||
} else {
|
||||
_rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
|
||||
_rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu,
|
||||
rsp->barrier_sequence);
|
||||
smp_mb__before_atomic();
|
||||
atomic_inc(&rsp->barrier_cpu_count);
|
||||
@@ -3654,11 +3561,11 @@ static void _rcu_barrier(struct rcu_state *rsp)
|
||||
rcu_barrier_callback, rsp, cpu, 0);
|
||||
}
|
||||
} else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
|
||||
_rcu_barrier_trace(rsp, "OnlineQ", cpu,
|
||||
_rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu,
|
||||
rsp->barrier_sequence);
|
||||
smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
|
||||
} else {
|
||||
_rcu_barrier_trace(rsp, "OnlineNQ", cpu,
|
||||
_rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu,
|
||||
rsp->barrier_sequence);
|
||||
}
|
||||
}
|
||||
@@ -3675,7 +3582,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
|
||||
wait_for_completion(&rsp->barrier_completion);
|
||||
|
||||
/* Mark the end of the barrier operation. */
|
||||
_rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence);
|
||||
_rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence);
|
||||
rcu_seq_end(&rsp->barrier_sequence);
|
||||
|
||||
/* Other rcu_barrier() invocations can now safely proceed. */
|
||||
@@ -3777,8 +3684,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
*/
|
||||
rnp = rdp->mynode;
|
||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||
if (!rdp->beenonline)
|
||||
WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
|
||||
rdp->beenonline = true; /* We have now been online. */
|
||||
rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
|
||||
rdp->completed = rnp->completed;
|
||||
@@ -3882,6 +3787,8 @@ void rcu_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long mask;
|
||||
int nbits;
|
||||
unsigned long oldmask;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_node *rnp;
|
||||
struct rcu_state *rsp;
|
||||
@@ -3892,9 +3799,15 @@ void rcu_cpu_starting(unsigned int cpu)
|
||||
mask = rdp->grpmask;
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
rnp->qsmaskinitnext |= mask;
|
||||
oldmask = rnp->expmaskinitnext;
|
||||
rnp->expmaskinitnext |= mask;
|
||||
oldmask ^= rnp->expmaskinitnext;
|
||||
nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
|
||||
/* Allow lockless access for expedited grace periods. */
|
||||
smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
@@ -3937,6 +3850,50 @@ void rcu_report_dead(unsigned int cpu)
|
||||
for_each_rcu_flavor(rsp)
|
||||
rcu_cleanup_dying_idle_cpu(cpu, rsp);
|
||||
}
|
||||
|
||||
/* Migrate the dead CPU's callbacks to the current CPU. */
|
||||
static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *my_rdp;
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
|
||||
|
||||
if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
|
||||
return; /* No callbacks to migrate. */
|
||||
|
||||
local_irq_save(flags);
|
||||
my_rdp = this_cpu_ptr(rsp->rda);
|
||||
if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) {
|
||||
local_irq_restore(flags);
|
||||
return;
|
||||
}
|
||||
raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
|
||||
rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */
|
||||
rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */
|
||||
rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
|
||||
WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
|
||||
!rcu_segcblist_n_cbs(&my_rdp->cblist));
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
|
||||
WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
|
||||
!rcu_segcblist_empty(&rdp->cblist),
|
||||
"rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
|
||||
cpu, rcu_segcblist_n_cbs(&rdp->cblist),
|
||||
rcu_segcblist_first_cb(&rdp->cblist));
|
||||
}
|
||||
|
||||
/*
|
||||
* The outgoing CPU has just passed through the dying-idle state,
|
||||
* and we are being invoked from the CPU that was IPIed to continue the
|
||||
* offline operation. We need to migrate the outgoing CPU's callbacks.
|
||||
*/
|
||||
void rcutree_migrate_callbacks(int cpu)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
rcu_migrate_callbacks(cpu, rsp);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@@ -219,8 +219,6 @@ struct rcu_data {
|
||||
/* qlen at last check for QS forcing */
|
||||
unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
|
||||
unsigned long n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
|
||||
unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
|
||||
unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */
|
||||
unsigned long n_force_qs_snap;
|
||||
/* did other CPU force QS recently? */
|
||||
long blimit; /* Upper limit on a processed batch */
|
||||
@@ -268,7 +266,9 @@ struct rcu_data {
|
||||
struct rcu_head **nocb_follower_tail;
|
||||
struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
|
||||
struct task_struct *nocb_kthread;
|
||||
raw_spinlock_t nocb_lock; /* Guard following pair of fields. */
|
||||
int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
|
||||
struct timer_list nocb_timer; /* Enforce finite deferral. */
|
||||
|
||||
/* The following fields are used by the leader, hence own cacheline. */
|
||||
struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
|
||||
@@ -350,15 +350,6 @@ struct rcu_state {
|
||||
|
||||
/* End of fields guarded by root rcu_node's lock. */
|
||||
|
||||
raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
|
||||
/* Protect following fields. */
|
||||
struct rcu_cblist orphan_pend; /* Orphaned callbacks that */
|
||||
/* need a grace period. */
|
||||
struct rcu_cblist orphan_done; /* Orphaned callbacks that */
|
||||
/* are ready to invoke. */
|
||||
/* (Contains counts.) */
|
||||
/* End of fields guarded by orphan_lock. */
|
||||
|
||||
struct mutex barrier_mutex; /* Guards barrier fields. */
|
||||
atomic_t barrier_cpu_count; /* # CPUs waiting on. */
|
||||
struct completion barrier_completion; /* Wake at barrier end. */
|
||||
@@ -495,7 +486,7 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp);
|
||||
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
|
||||
bool lazy, unsigned long flags);
|
||||
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
|
||||
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
|
||||
struct rcu_data *rdp,
|
||||
unsigned long flags);
|
||||
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
|
||||
|
@@ -73,7 +73,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
|
||||
unsigned long flags;
|
||||
unsigned long mask;
|
||||
unsigned long oldmask;
|
||||
int ncpus = READ_ONCE(rsp->ncpus);
|
||||
int ncpus = smp_load_acquire(&rsp->ncpus); /* Order against locking. */
|
||||
struct rcu_node *rnp;
|
||||
struct rcu_node *rnp_up;
|
||||
|
||||
|
@@ -180,6 +180,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
struct task_struct *t = current;
|
||||
|
||||
lockdep_assert_held(&rnp->lock);
|
||||
WARN_ON_ONCE(rdp->mynode != rnp);
|
||||
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
|
||||
|
||||
/*
|
||||
* Decide where to queue the newly blocked task. In theory,
|
||||
@@ -261,6 +263,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
rnp->gp_tasks = &t->rcu_node_entry;
|
||||
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
|
||||
rnp->exp_tasks = &t->rcu_node_entry;
|
||||
WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
|
||||
!(rnp->qsmask & rdp->grpmask));
|
||||
WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
|
||||
!(rnp->expmask & rdp->grpmask));
|
||||
raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */
|
||||
|
||||
/*
|
||||
@@ -482,6 +488,7 @@ void rcu_read_unlock_special(struct task_struct *t)
|
||||
rnp = t->rcu_blocked_node;
|
||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||
WARN_ON_ONCE(rnp != t->rcu_blocked_node);
|
||||
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
|
||||
empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
|
||||
empty_exp = sync_rcu_preempt_exp_done(rnp);
|
||||
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
|
||||
@@ -495,10 +502,10 @@ void rcu_read_unlock_special(struct task_struct *t)
|
||||
if (&t->rcu_node_entry == rnp->exp_tasks)
|
||||
rnp->exp_tasks = np;
|
||||
if (IS_ENABLED(CONFIG_RCU_BOOST)) {
|
||||
if (&t->rcu_node_entry == rnp->boost_tasks)
|
||||
rnp->boost_tasks = np;
|
||||
/* Snapshot ->boost_mtx ownership w/rnp->lock held. */
|
||||
drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
|
||||
if (&t->rcu_node_entry == rnp->boost_tasks)
|
||||
rnp->boost_tasks = np;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -636,10 +643,17 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
|
||||
*/
|
||||
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
|
||||
{
|
||||
struct task_struct *t;
|
||||
|
||||
RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
|
||||
WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
|
||||
if (rcu_preempt_has_tasks(rnp))
|
||||
if (rcu_preempt_has_tasks(rnp)) {
|
||||
rnp->gp_tasks = rnp->blkd_tasks.next;
|
||||
t = container_of(rnp->gp_tasks, struct task_struct,
|
||||
rcu_node_entry);
|
||||
trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
|
||||
rnp->gpnum, t->pid);
|
||||
}
|
||||
WARN_ON_ONCE(rnp->qsmask);
|
||||
}
|
||||
|
||||
@@ -1788,22 +1802,61 @@ bool rcu_is_nocb_cpu(int cpu)
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick the leader kthread for this NOCB group.
|
||||
* Kick the leader kthread for this NOCB group. Caller holds ->nocb_lock
|
||||
* and this function releases it.
|
||||
*/
|
||||
static void wake_nocb_leader(struct rcu_data *rdp, bool force)
|
||||
static void __wake_nocb_leader(struct rcu_data *rdp, bool force,
|
||||
unsigned long flags)
|
||||
__releases(rdp->nocb_lock)
|
||||
{
|
||||
struct rcu_data *rdp_leader = rdp->nocb_leader;
|
||||
|
||||
if (!READ_ONCE(rdp_leader->nocb_kthread))
|
||||
lockdep_assert_held(&rdp->nocb_lock);
|
||||
if (!READ_ONCE(rdp_leader->nocb_kthread)) {
|
||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
||||
return;
|
||||
if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
|
||||
}
|
||||
if (rdp_leader->nocb_leader_sleep || force) {
|
||||
/* Prior smp_mb__after_atomic() orders against prior enqueue. */
|
||||
WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
|
||||
del_timer(&rdp->nocb_timer);
|
||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
||||
smp_mb(); /* ->nocb_leader_sleep before swake_up(). */
|
||||
swake_up(&rdp_leader->nocb_wq);
|
||||
} else {
|
||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick the leader kthread for this NOCB group, but caller has not
|
||||
* acquired locks.
|
||||
*/
|
||||
static void wake_nocb_leader(struct rcu_data *rdp, bool force)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
||||
__wake_nocb_leader(rdp, force, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Arrange to wake the leader kthread for this NOCB group at some
|
||||
* future time when it is safe to do so.
|
||||
*/
|
||||
static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype,
|
||||
const char *reason)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
||||
if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
|
||||
mod_timer(&rdp->nocb_timer, jiffies + 1);
|
||||
WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, reason);
|
||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the specified CPU need an RCU callback for the specified flavor
|
||||
* of rcu_barrier()?
|
||||
@@ -1891,11 +1944,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WakeEmpty"));
|
||||
} else {
|
||||
WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE);
|
||||
/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
|
||||
smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WakeEmptyIsDeferred"));
|
||||
wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
|
||||
TPS("WakeEmptyIsDeferred"));
|
||||
}
|
||||
rdp->qlen_last_fqs_check = 0;
|
||||
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
|
||||
@@ -1905,11 +1955,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WakeOvf"));
|
||||
} else {
|
||||
WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_FORCE);
|
||||
/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
|
||||
smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WakeOvfIsDeferred"));
|
||||
wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
|
||||
TPS("WakeOvfIsDeferred"));
|
||||
}
|
||||
rdp->qlen_last_fqs_check = LONG_MAX / 2;
|
||||
} else {
|
||||
@@ -1961,30 +2008,19 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
|
||||
* Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
|
||||
* not a no-CBs CPU.
|
||||
*/
|
||||
static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
|
||||
static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
|
||||
struct rcu_data *rdp,
|
||||
unsigned long flags)
|
||||
{
|
||||
long ql = rsp->orphan_done.len;
|
||||
long qll = rsp->orphan_done.len_lazy;
|
||||
|
||||
/* If this is not a no-CBs CPU, tell the caller to do it the old way. */
|
||||
RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_nocb_adopt_orphan_cbs() invoked with irqs enabled!!!");
|
||||
if (!rcu_is_nocb_cpu(smp_processor_id()))
|
||||
return false;
|
||||
|
||||
/* First, enqueue the donelist, if any. This preserves CB ordering. */
|
||||
if (rsp->orphan_done.head) {
|
||||
__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done),
|
||||
rcu_cblist_tail(&rsp->orphan_done),
|
||||
ql, qll, flags);
|
||||
}
|
||||
if (rsp->orphan_pend.head) {
|
||||
__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend),
|
||||
rcu_cblist_tail(&rsp->orphan_pend),
|
||||
ql, qll, flags);
|
||||
}
|
||||
rcu_cblist_init(&rsp->orphan_done);
|
||||
rcu_cblist_init(&rsp->orphan_pend);
|
||||
return false; /* Not NOCBs CPU, caller must migrate CBs. */
|
||||
__call_rcu_nocb_enqueue(my_rdp, rcu_segcblist_head(&rdp->cblist),
|
||||
rcu_segcblist_tail(&rdp->cblist),
|
||||
rcu_segcblist_n_cbs(&rdp->cblist),
|
||||
rcu_segcblist_n_lazy_cbs(&rdp->cblist), flags);
|
||||
rcu_segcblist_init(&rdp->cblist);
|
||||
rcu_segcblist_disable(&rdp->cblist);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -2031,6 +2067,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
|
||||
static void nocb_leader_wait(struct rcu_data *my_rdp)
|
||||
{
|
||||
bool firsttime = true;
|
||||
unsigned long flags;
|
||||
bool gotcbs;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_head **tail;
|
||||
@@ -2039,13 +2076,17 @@ wait_again:
|
||||
|
||||
/* Wait for callbacks to appear. */
|
||||
if (!rcu_nocb_poll) {
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep"));
|
||||
swait_event_interruptible(my_rdp->nocb_wq,
|
||||
!READ_ONCE(my_rdp->nocb_leader_sleep));
|
||||
/* Memory barrier handled by smp_mb() calls below and repoll. */
|
||||
raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
|
||||
my_rdp->nocb_leader_sleep = true;
|
||||
WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
|
||||
del_timer(&my_rdp->nocb_timer);
|
||||
raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
|
||||
} else if (firsttime) {
|
||||
firsttime = false; /* Don't drown trace log with "Poll"! */
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Poll");
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Poll"));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2054,7 +2095,7 @@ wait_again:
|
||||
* nocb_gp_head, where they await a grace period.
|
||||
*/
|
||||
gotcbs = false;
|
||||
smp_mb(); /* wakeup before ->nocb_head reads. */
|
||||
smp_mb(); /* wakeup and _sleep before ->nocb_head reads. */
|
||||
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
|
||||
rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
|
||||
if (!rdp->nocb_gp_head)
|
||||
@@ -2066,56 +2107,41 @@ wait_again:
|
||||
gotcbs = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there were no callbacks, sleep a bit, rescan after a
|
||||
* memory barrier, and go retry.
|
||||
*/
|
||||
/* No callbacks? Sleep a bit if polling, and go retry. */
|
||||
if (unlikely(!gotcbs)) {
|
||||
if (!rcu_nocb_poll)
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
|
||||
"WokeEmpty");
|
||||
WARN_ON(signal_pending(current));
|
||||
schedule_timeout_interruptible(1);
|
||||
|
||||
/* Rescan in case we were a victim of memory ordering. */
|
||||
my_rdp->nocb_leader_sleep = true;
|
||||
smp_mb(); /* Ensure _sleep true before scan. */
|
||||
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower)
|
||||
if (READ_ONCE(rdp->nocb_head)) {
|
||||
/* Found CB, so short-circuit next wait. */
|
||||
my_rdp->nocb_leader_sleep = false;
|
||||
break;
|
||||
}
|
||||
if (rcu_nocb_poll) {
|
||||
schedule_timeout_interruptible(1);
|
||||
} else {
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
|
||||
TPS("WokeEmpty"));
|
||||
}
|
||||
goto wait_again;
|
||||
}
|
||||
|
||||
/* Wait for one grace period. */
|
||||
rcu_nocb_wait_gp(my_rdp);
|
||||
|
||||
/*
|
||||
* We left ->nocb_leader_sleep unset to reduce cache thrashing.
|
||||
* We set it now, but recheck for new callbacks while
|
||||
* traversing our follower list.
|
||||
*/
|
||||
my_rdp->nocb_leader_sleep = true;
|
||||
smp_mb(); /* Ensure _sleep true before scan of ->nocb_head. */
|
||||
|
||||
/* Each pass through the following loop wakes a follower, if needed. */
|
||||
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
|
||||
if (READ_ONCE(rdp->nocb_head))
|
||||
if (!rcu_nocb_poll &&
|
||||
READ_ONCE(rdp->nocb_head) &&
|
||||
READ_ONCE(my_rdp->nocb_leader_sleep)) {
|
||||
raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
|
||||
my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/
|
||||
raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
|
||||
}
|
||||
if (!rdp->nocb_gp_head)
|
||||
continue; /* No CBs, so no need to wake follower. */
|
||||
|
||||
/* Append callbacks to follower's "done" list. */
|
||||
tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail);
|
||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
||||
tail = rdp->nocb_follower_tail;
|
||||
rdp->nocb_follower_tail = rdp->nocb_gp_tail;
|
||||
*tail = rdp->nocb_gp_head;
|
||||
smp_mb__after_atomic(); /* Store *tail before wakeup. */
|
||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
||||
if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
|
||||
/*
|
||||
* List was empty, wake up the follower.
|
||||
* Memory barriers supplied by atomic_long_add().
|
||||
*/
|
||||
/* List was empty, so wake up the follower. */
|
||||
swake_up(&rdp->nocb_wq);
|
||||
}
|
||||
}
|
||||
@@ -2131,28 +2157,16 @@ wait_again:
|
||||
*/
|
||||
static void nocb_follower_wait(struct rcu_data *rdp)
|
||||
{
|
||||
bool firsttime = true;
|
||||
|
||||
for (;;) {
|
||||
if (!rcu_nocb_poll) {
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
"FollowerSleep");
|
||||
swait_event_interruptible(rdp->nocb_wq,
|
||||
READ_ONCE(rdp->nocb_follower_head));
|
||||
} else if (firsttime) {
|
||||
/* Don't drown trace log with "Poll"! */
|
||||
firsttime = false;
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "Poll");
|
||||
}
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep"));
|
||||
swait_event_interruptible(rdp->nocb_wq,
|
||||
READ_ONCE(rdp->nocb_follower_head));
|
||||
if (smp_load_acquire(&rdp->nocb_follower_head)) {
|
||||
/* ^^^ Ensure CB invocation follows _head test. */
|
||||
return;
|
||||
}
|
||||
if (!rcu_nocb_poll)
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
"WokeEmpty");
|
||||
WARN_ON(signal_pending(current));
|
||||
schedule_timeout_interruptible(1);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeEmpty"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2165,6 +2179,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
|
||||
static int rcu_nocb_kthread(void *arg)
|
||||
{
|
||||
int c, cl;
|
||||
unsigned long flags;
|
||||
struct rcu_head *list;
|
||||
struct rcu_head *next;
|
||||
struct rcu_head **tail;
|
||||
@@ -2179,11 +2194,14 @@ static int rcu_nocb_kthread(void *arg)
|
||||
nocb_follower_wait(rdp);
|
||||
|
||||
/* Pull the ready-to-invoke callbacks onto local list. */
|
||||
list = READ_ONCE(rdp->nocb_follower_head);
|
||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
||||
list = rdp->nocb_follower_head;
|
||||
rdp->nocb_follower_head = NULL;
|
||||
tail = rdp->nocb_follower_tail;
|
||||
rdp->nocb_follower_tail = &rdp->nocb_follower_head;
|
||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
||||
BUG_ON(!list);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");
|
||||
WRITE_ONCE(rdp->nocb_follower_head, NULL);
|
||||
tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeNonEmpty"));
|
||||
|
||||
/* Each pass through the following loop invokes a callback. */
|
||||
trace_rcu_batch_start(rdp->rsp->name,
|
||||
@@ -2226,18 +2244,39 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
|
||||
}
|
||||
|
||||
/* Do a deferred wakeup of rcu_nocb_kthread(). */
|
||||
static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
|
||||
static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ndw;
|
||||
|
||||
if (!rcu_nocb_need_deferred_wakeup(rdp))
|
||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
||||
if (!rcu_nocb_need_deferred_wakeup(rdp)) {
|
||||
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
|
||||
return;
|
||||
}
|
||||
ndw = READ_ONCE(rdp->nocb_defer_wakeup);
|
||||
WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
|
||||
wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE);
|
||||
__wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
|
||||
}
|
||||
|
||||
/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
|
||||
static void do_nocb_deferred_wakeup_timer(unsigned long x)
|
||||
{
|
||||
do_nocb_deferred_wakeup_common((struct rcu_data *)x);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do a deferred wakeup of rcu_nocb_kthread() from fastpath.
|
||||
* This means we do an inexact common-case check. Note that if
|
||||
* we miss, ->nocb_timer will eventually clean things up.
|
||||
*/
|
||||
static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
|
||||
{
|
||||
if (rcu_nocb_need_deferred_wakeup(rdp))
|
||||
do_nocb_deferred_wakeup_common(rdp);
|
||||
}
|
||||
|
||||
void __init rcu_init_nohz(void)
|
||||
{
|
||||
int cpu;
|
||||
@@ -2287,6 +2326,9 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
|
||||
rdp->nocb_tail = &rdp->nocb_head;
|
||||
init_swait_queue_head(&rdp->nocb_wq);
|
||||
rdp->nocb_follower_tail = &rdp->nocb_follower_head;
|
||||
raw_spin_lock_init(&rdp->nocb_lock);
|
||||
setup_timer(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer,
|
||||
(unsigned long)rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2459,7 +2501,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
|
||||
static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
|
||||
struct rcu_data *rdp,
|
||||
unsigned long flags)
|
||||
{
|
||||
|
@@ -568,7 +568,7 @@ static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
|
||||
static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
|
||||
|
||||
/* Track exiting tasks in order to allow them to be waited for. */
|
||||
DEFINE_SRCU(tasks_rcu_exit_srcu);
|
||||
DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
|
||||
|
||||
/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
|
||||
#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
|
||||
@@ -875,6 +875,22 @@ static void rcu_spawn_tasks_kthread(void)
|
||||
mutex_unlock(&rcu_tasks_kthread_mutex);
|
||||
}
|
||||
|
||||
/* Do the srcu_read_lock() for the above synchronize_srcu(). */
|
||||
void exit_tasks_rcu_start(void)
|
||||
{
|
||||
preempt_disable();
|
||||
current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
|
||||
void exit_tasks_rcu_finish(void)
|
||||
{
|
||||
preempt_disable();
|
||||
__srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#endif /* #ifdef CONFIG_TASKS_RCU */
|
||||
|
||||
#ifndef CONFIG_TINY_RCU
|
||||
|
@@ -25,3 +25,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
||||
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
|
||||
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
|
||||
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
|
||||
obj-$(CONFIG_MEMBARRIER) += membarrier.o
|
||||
|
@@ -300,6 +300,8 @@ EXPORT_SYMBOL(try_wait_for_completion);
|
||||
*/
|
||||
bool completion_done(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!READ_ONCE(x->done))
|
||||
return false;
|
||||
|
||||
@@ -307,14 +309,9 @@ bool completion_done(struct completion *x)
|
||||
* If ->done, we need to wait for complete() to release ->wait.lock
|
||||
* otherwise we can end up freeing the completion before complete()
|
||||
* is done referencing it.
|
||||
*
|
||||
* The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
|
||||
* the loads of ->done and ->wait.lock such that we cannot observe
|
||||
* the lock before complete() acquires it while observing the ->done
|
||||
* after it's acquired the lock.
|
||||
*/
|
||||
smp_rmb();
|
||||
spin_unlock_wait(&x->wait.lock);
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(completion_done);
|
||||
|
@@ -951,8 +951,13 @@ struct migration_arg {
|
||||
static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
|
||||
struct task_struct *p, int dest_cpu)
|
||||
{
|
||||
if (unlikely(!cpu_active(dest_cpu)))
|
||||
return rq;
|
||||
if (p->flags & PF_KTHREAD) {
|
||||
if (unlikely(!cpu_online(dest_cpu)))
|
||||
return rq;
|
||||
} else {
|
||||
if (unlikely(!cpu_active(dest_cpu)))
|
||||
return rq;
|
||||
}
|
||||
|
||||
/* Affinity changed (again). */
|
||||
if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
|
||||
@@ -2635,6 +2640,16 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
||||
prev_state = prev->state;
|
||||
vtime_task_switch(prev);
|
||||
perf_event_task_sched_in(prev, current);
|
||||
/*
|
||||
* The membarrier system call requires a full memory barrier
|
||||
* after storing to rq->curr, before going back to user-space.
|
||||
*
|
||||
* TODO: This smp_mb__after_unlock_lock can go away if PPC end
|
||||
* up adding a full barrier to switch_mm(), or we should figure
|
||||
* out if a smp_mb__after_unlock_lock is really the proper API
|
||||
* to use.
|
||||
*/
|
||||
smp_mb__after_unlock_lock();
|
||||
finish_lock_switch(rq, prev);
|
||||
finish_arch_post_lock_switch();
|
||||
|
||||
@@ -3324,6 +3339,21 @@ static void __sched notrace __schedule(bool preempt)
|
||||
if (likely(prev != next)) {
|
||||
rq->nr_switches++;
|
||||
rq->curr = next;
|
||||
/*
|
||||
* The membarrier system call requires each architecture
|
||||
* to have a full memory barrier after updating
|
||||
* rq->curr, before returning to user-space. For TSO
|
||||
* (e.g. x86), the architecture must provide its own
|
||||
* barrier in switch_mm(). For weakly ordered machines
|
||||
* for which spin_unlock() acts as a full memory
|
||||
* barrier, finish_lock_switch() in common code takes
|
||||
* care of this barrier. For weakly ordered machines for
|
||||
* which spin_unlock() acts as a RELEASE barrier (only
|
||||
* arm64 and PowerPC), arm64 has a full barrier in
|
||||
* switch_to(), and PowerPC has
|
||||
* smp_mb__after_unlock_lock() before
|
||||
* finish_lock_switch().
|
||||
*/
|
||||
++*switch_count;
|
||||
|
||||
trace_sched_switch(preempt, prev, next);
|
||||
@@ -3352,8 +3382,8 @@ void __noreturn do_task_dead(void)
|
||||
* To avoid it, we have to wait for releasing tsk->pi_lock which
|
||||
* is held by try_to_wake_up()
|
||||
*/
|
||||
smp_mb();
|
||||
raw_spin_unlock_wait(¤t->pi_lock);
|
||||
raw_spin_lock_irq(¤t->pi_lock);
|
||||
raw_spin_unlock_irq(¤t->pi_lock);
|
||||
|
||||
/* Causes final put_task_struct in finish_task_switch(): */
|
||||
__set_current_state(TASK_DEAD);
|
||||
|
152
kernel/sched/membarrier.c
Normal file
152
kernel/sched/membarrier.c
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
||||
*
|
||||
* membarrier system call
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/membarrier.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
#include "sched.h" /* for cpu_rq(). */
|
||||
|
||||
/*
|
||||
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
|
||||
* except MEMBARRIER_CMD_QUERY.
|
||||
*/
|
||||
#define MEMBARRIER_CMD_BITMASK \
|
||||
(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
|
||||
|
||||
static void ipi_mb(void *info)
|
||||
{
|
||||
smp_mb(); /* IPIs should be serializing but paranoid. */
|
||||
}
|
||||
|
||||
static void membarrier_private_expedited(void)
|
||||
{
|
||||
int cpu;
|
||||
bool fallback = false;
|
||||
cpumask_var_t tmpmask;
|
||||
|
||||
if (num_online_cpus() == 1)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Matches memory barriers around rq->curr modification in
|
||||
* scheduler.
|
||||
*/
|
||||
smp_mb(); /* system call entry is not a mb. */
|
||||
|
||||
/*
|
||||
* Expedited membarrier commands guarantee that they won't
|
||||
* block, hence the GFP_NOWAIT allocation flag and fallback
|
||||
* implementation.
|
||||
*/
|
||||
if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
|
||||
/* Fallback for OOM. */
|
||||
fallback = true;
|
||||
}
|
||||
|
||||
cpus_read_lock();
|
||||
for_each_online_cpu(cpu) {
|
||||
struct task_struct *p;
|
||||
|
||||
/*
|
||||
* Skipping the current CPU is OK even through we can be
|
||||
* migrated at any point. The current CPU, at the point
|
||||
* where we read raw_smp_processor_id(), is ensured to
|
||||
* be in program order with respect to the caller
|
||||
* thread. Therefore, we can skip this CPU from the
|
||||
* iteration.
|
||||
*/
|
||||
if (cpu == raw_smp_processor_id())
|
||||
continue;
|
||||
rcu_read_lock();
|
||||
p = task_rcu_dereference(&cpu_rq(cpu)->curr);
|
||||
if (p && p->mm == current->mm) {
|
||||
if (!fallback)
|
||||
__cpumask_set_cpu(cpu, tmpmask);
|
||||
else
|
||||
smp_call_function_single(cpu, ipi_mb, NULL, 1);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
if (!fallback) {
|
||||
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
|
||||
free_cpumask_var(tmpmask);
|
||||
}
|
||||
cpus_read_unlock();
|
||||
|
||||
/*
|
||||
* Memory barrier on the caller thread _after_ we finished
|
||||
* waiting for the last IPI. Matches memory barriers around
|
||||
* rq->curr modification in scheduler.
|
||||
*/
|
||||
smp_mb(); /* exit from system call is not a mb */
|
||||
}
|
||||
|
||||
/**
|
||||
* sys_membarrier - issue memory barriers on a set of threads
|
||||
* @cmd: Takes command values defined in enum membarrier_cmd.
|
||||
* @flags: Currently needs to be 0. For future extensions.
|
||||
*
|
||||
* If this system call is not implemented, -ENOSYS is returned. If the
|
||||
* command specified does not exist, not available on the running
|
||||
* kernel, or if the command argument is invalid, this system call
|
||||
* returns -EINVAL. For a given command, with flags argument set to 0,
|
||||
* this system call is guaranteed to always return the same value until
|
||||
* reboot.
|
||||
*
|
||||
* All memory accesses performed in program order from each targeted thread
|
||||
* is guaranteed to be ordered with respect to sys_membarrier(). If we use
|
||||
* the semantic "barrier()" to represent a compiler barrier forcing memory
|
||||
* accesses to be performed in program order across the barrier, and
|
||||
* smp_mb() to represent explicit memory barriers forcing full memory
|
||||
* ordering across the barrier, we have the following ordering table for
|
||||
* each pair of barrier(), sys_membarrier() and smp_mb():
|
||||
*
|
||||
* The pair ordering is detailed as (O: ordered, X: not ordered):
|
||||
*
|
||||
* barrier() smp_mb() sys_membarrier()
|
||||
* barrier() X X O
|
||||
* smp_mb() X O O
|
||||
* sys_membarrier() O O O
|
||||
*/
|
||||
SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
|
||||
{
|
||||
if (unlikely(flags))
|
||||
return -EINVAL;
|
||||
switch (cmd) {
|
||||
case MEMBARRIER_CMD_QUERY:
|
||||
{
|
||||
int cmd_mask = MEMBARRIER_CMD_BITMASK;
|
||||
|
||||
if (tick_nohz_full_enabled())
|
||||
cmd_mask &= ~MEMBARRIER_CMD_SHARED;
|
||||
return cmd_mask;
|
||||
}
|
||||
case MEMBARRIER_CMD_SHARED:
|
||||
/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
|
||||
if (tick_nohz_full_enabled())
|
||||
return -EINVAL;
|
||||
if (num_online_cpus() > 1)
|
||||
synchronize_sched();
|
||||
return 0;
|
||||
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
|
||||
membarrier_private_expedited();
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
@@ -96,20 +96,16 @@ void task_work_run(void)
|
||||
* work->func() can do task_work_add(), do not set
|
||||
* work_exited unless the list is empty.
|
||||
*/
|
||||
raw_spin_lock_irq(&task->pi_lock);
|
||||
do {
|
||||
work = READ_ONCE(task->task_works);
|
||||
head = !work && (task->flags & PF_EXITING) ?
|
||||
&work_exited : NULL;
|
||||
} while (cmpxchg(&task->task_works, work, head) != work);
|
||||
raw_spin_unlock_irq(&task->pi_lock);
|
||||
|
||||
if (!work)
|
||||
break;
|
||||
/*
|
||||
* Synchronize with task_work_cancel(). It can't remove
|
||||
* the first entry == work, cmpxchg(task_works) should
|
||||
* fail, but it can play with *work and other entries.
|
||||
*/
|
||||
raw_spin_unlock_wait(&task->pi_lock);
|
||||
|
||||
do {
|
||||
next = work->next;
|
||||
|
@@ -117,7 +117,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
|
||||
torture_type, cpu);
|
||||
(*n_offl_successes)++;
|
||||
delta = jiffies - starttime;
|
||||
sum_offl += delta;
|
||||
*sum_offl += delta;
|
||||
if (*min_offl < 0) {
|
||||
*min_offl = delta;
|
||||
*max_offl = delta;
|
||||
|
Reference in New Issue
Block a user