Merge branches 'cond_resched.2017.12.04a', 'dyntick.2017.11.28a', 'fixes.2017.12.11a', 'srbd.2017.12.05a' and 'torture.2017.12.11a' into HEAD

cond_resched.2017.12.04a: Convert cond_resched_rcu_qs() to cond_resched()
dyntick.2017.11.28a: Make RCU dynticks handle interrupts from NMI
fixes.2017.12.11a: Miscellaneous fixes
srbd.2017.12.05a: Remove now-redundant smp_read_barrier_depends()
torture.2017.12.11a: Torture-testing update
This commit is contained in:
Paul E. McKenney
2017-12-11 09:21:58 -08:00
72 changed files with 482 additions and 729 deletions

View File

@@ -30,31 +30,8 @@
#define RCU_TRACE(stmt)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
/*
* Process-level increment to ->dynticks_nesting field. This allows for
* architectures that use half-interrupts and half-exceptions from
* process context.
*
* DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH
* that counts the number of process-based reasons why RCU cannot
* consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE
* is the value used to increment or decrement this field.
*
* The rest of the bits could in principle be used to count interrupts,
* but this would mean that a negative-one value in the interrupt
* field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field.
* We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK
* that is set to DYNTICK_TASK_FLAG upon initial exit from idle.
* The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon
* initial exit from idle.
*/
#define DYNTICK_TASK_NEST_WIDTH 7
#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1)
#define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1)
#define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2)
#define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3)
#define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \
DYNTICK_TASK_FLAG)
/* Offset to allow for unmatched rcu_irq_{enter,exit}(). */
#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1)
/*

View File

@@ -106,10 +106,6 @@ static int rcu_perf_writer_state;
#define MAX_MEAS 10000
#define MIN_MEAS 100
static int perf_runnable = IS_ENABLED(MODULE);
module_param(perf_runnable, int, 0444);
MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot");
/*
* Operations vector for selecting different types of tests.
*/
@@ -646,7 +642,7 @@ rcu_perf_init(void)
&tasks_ops,
};
if (!torture_init_begin(perf_type, verbose, &perf_runnable))
if (!torture_init_begin(perf_type, verbose))
return -EBUSY;
/* Process args and tell the world that the perf'er is on the job. */

View File

@@ -187,10 +187,6 @@ static const char *rcu_torture_writer_state_getname(void)
return rcu_torture_writer_state_names[i];
}
static int torture_runnable = IS_ENABLED(MODULE);
module_param(torture_runnable, int, 0444);
MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
#define rcu_can_boost() 1
#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
@@ -315,11 +311,9 @@ static void rcu_read_delay(struct torture_random_state *rrsp)
}
if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
udelay(shortdelay_us);
#ifdef CONFIG_PREEMPT
if (!preempt_count() &&
!(torture_random(rrsp) % (nrealreaders * 20000)))
preempt_schedule(); /* No QS if preempt_disable() in effect */
#endif
!(torture_random(rrsp) % (nrealreaders * 500)))
torture_preempt_schedule(); /* QS only if preemptible. */
}
static void rcu_torture_read_unlock(int idx) __releases(RCU)
@@ -1731,7 +1725,7 @@ rcu_torture_init(void)
&sched_ops, &tasks_ops,
};
if (!torture_init_begin(torture_type, verbose, &torture_runnable))
if (!torture_init_begin(torture_type, verbose))
return -EBUSY;
/* Process args and tell the world that the torturer is on the job. */

View File

@@ -53,6 +53,33 @@ static void srcu_invoke_callbacks(struct work_struct *work);
static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
static void process_srcu(struct work_struct *work);
/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
#define spin_lock_rcu_node(p) \
do { \
spin_lock(&ACCESS_PRIVATE(p, lock)); \
smp_mb__after_unlock_lock(); \
} while (0)
#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
#define spin_lock_irq_rcu_node(p) \
do { \
spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
smp_mb__after_unlock_lock(); \
} while (0)
#define spin_unlock_irq_rcu_node(p) \
spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
#define spin_lock_irqsave_rcu_node(p, flags) \
do { \
spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
smp_mb__after_unlock_lock(); \
} while (0)
#define spin_unlock_irqrestore_rcu_node(p, flags) \
spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
/*
* Initialize SRCU combining tree. Note that statically allocated
* srcu_struct structures might already have srcu_read_lock() and
@@ -77,7 +104,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
/* Each pass through this loop initializes one srcu_node structure. */
rcu_for_each_node_breadth_first(sp, snp) {
raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
spin_lock_init(&ACCESS_PRIVATE(snp, lock));
WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
ARRAY_SIZE(snp->srcu_data_have_cbs));
for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
@@ -111,7 +138,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
snp_first = sp->level[level];
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(sp->sda, cpu);
raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
rcu_segcblist_init(&sdp->srcu_cblist);
sdp->srcu_cblist_invoking = false;
sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
@@ -170,7 +197,7 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
/* Don't re-initialize a lock while it is held. */
debug_check_no_locks_freed((void *)sp, sizeof(*sp));
lockdep_init_map(&sp->dep_map, name, key, 0);
raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
spin_lock_init(&ACCESS_PRIVATE(sp, lock));
return init_srcu_struct_fields(sp, false);
}
EXPORT_SYMBOL_GPL(__init_srcu_struct);
@@ -187,7 +214,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
*/
int init_srcu_struct(struct srcu_struct *sp)
{
raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
spin_lock_init(&ACCESS_PRIVATE(sp, lock));
return init_srcu_struct_fields(sp, false);
}
EXPORT_SYMBOL_GPL(init_srcu_struct);
@@ -210,13 +237,13 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
/* The smp_load_acquire() pairs with the smp_store_release(). */
if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
return; /* Already initialized. */
raw_spin_lock_irqsave_rcu_node(sp, flags);
spin_lock_irqsave_rcu_node(sp, flags);
if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
raw_spin_unlock_irqrestore_rcu_node(sp, flags);
spin_unlock_irqrestore_rcu_node(sp, flags);
return;
}
init_srcu_struct_fields(sp, true);
raw_spin_unlock_irqrestore_rcu_node(sp, flags);
spin_unlock_irqrestore_rcu_node(sp, flags);
}
/*
@@ -513,7 +540,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
mutex_lock(&sp->srcu_cb_mutex);
/* End the current grace period. */
raw_spin_lock_irq_rcu_node(sp);
spin_lock_irq_rcu_node(sp);
idx = rcu_seq_state(sp->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
cbdelay = srcu_get_delay(sp);
@@ -522,7 +549,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
gpseq = rcu_seq_current(&sp->srcu_gp_seq);
if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
sp->srcu_gp_seq_needed_exp = gpseq;
raw_spin_unlock_irq_rcu_node(sp);
spin_unlock_irq_rcu_node(sp);
mutex_unlock(&sp->srcu_gp_mutex);
/* A new grace period can start at this point. But only one. */
@@ -530,7 +557,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
rcu_for_each_node_breadth_first(sp, snp) {
raw_spin_lock_irq_rcu_node(snp);
spin_lock_irq_rcu_node(snp);
cbs = false;
if (snp >= sp->level[rcu_num_lvls - 1])
cbs = snp->srcu_have_cbs[idx] == gpseq;
@@ -540,7 +567,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
snp->srcu_gp_seq_needed_exp = gpseq;
mask = snp->srcu_data_have_cbs[idx];
snp->srcu_data_have_cbs[idx] = 0;
raw_spin_unlock_irq_rcu_node(snp);
spin_unlock_irq_rcu_node(snp);
if (cbs)
srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
@@ -548,11 +575,11 @@ static void srcu_gp_end(struct srcu_struct *sp)
if (!(gpseq & counter_wrap_check))
for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
sdp = per_cpu_ptr(sp->sda, cpu);
raw_spin_lock_irqsave_rcu_node(sdp, flags);
spin_lock_irqsave_rcu_node(sdp, flags);
if (ULONG_CMP_GE(gpseq,
sdp->srcu_gp_seq_needed + 100))
sdp->srcu_gp_seq_needed = gpseq;
raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
spin_unlock_irqrestore_rcu_node(sdp, flags);
}
}
@@ -560,17 +587,17 @@ static void srcu_gp_end(struct srcu_struct *sp)
mutex_unlock(&sp->srcu_cb_mutex);
/* Start a new grace period if needed. */
raw_spin_lock_irq_rcu_node(sp);
spin_lock_irq_rcu_node(sp);
gpseq = rcu_seq_current(&sp->srcu_gp_seq);
if (!rcu_seq_state(gpseq) &&
ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
srcu_gp_start(sp);
raw_spin_unlock_irq_rcu_node(sp);
spin_unlock_irq_rcu_node(sp);
/* Throttle expedited grace periods: Should be rare! */
srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
? 0 : SRCU_INTERVAL);
} else {
raw_spin_unlock_irq_rcu_node(sp);
spin_unlock_irq_rcu_node(sp);
}
}
@@ -590,18 +617,18 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
return;
raw_spin_lock_irqsave_rcu_node(snp, flags);
spin_lock_irqsave_rcu_node(snp, flags);
if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
raw_spin_unlock_irqrestore_rcu_node(snp, flags);
spin_unlock_irqrestore_rcu_node(snp, flags);
return;
}
WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
raw_spin_unlock_irqrestore_rcu_node(snp, flags);
spin_unlock_irqrestore_rcu_node(snp, flags);
}
raw_spin_lock_irqsave_rcu_node(sp, flags);
spin_lock_irqsave_rcu_node(sp, flags);
if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
sp->srcu_gp_seq_needed_exp = s;
raw_spin_unlock_irqrestore_rcu_node(sp, flags);
spin_unlock_irqrestore_rcu_node(sp, flags);
}
/*
@@ -623,12 +650,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
for (; snp != NULL; snp = snp->srcu_parent) {
if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
return; /* GP already done and CBs recorded. */
raw_spin_lock_irqsave_rcu_node(snp, flags);
spin_lock_irqsave_rcu_node(snp, flags);
if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
snp_seq = snp->srcu_have_cbs[idx];
if (snp == sdp->mynode && snp_seq == s)
snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
raw_spin_unlock_irqrestore_rcu_node(snp, flags);
spin_unlock_irqrestore_rcu_node(snp, flags);
if (snp == sdp->mynode && snp_seq != s) {
srcu_schedule_cbs_sdp(sdp, do_norm
? SRCU_INTERVAL
@@ -644,11 +671,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
snp->srcu_gp_seq_needed_exp = s;
raw_spin_unlock_irqrestore_rcu_node(snp, flags);
spin_unlock_irqrestore_rcu_node(snp, flags);
}
/* Top of tree, must ensure the grace period will be started. */
raw_spin_lock_irqsave_rcu_node(sp, flags);
spin_lock_irqsave_rcu_node(sp, flags);
if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
/*
* Record need for grace period s. Pair with load
@@ -667,7 +694,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
queue_delayed_work(system_power_efficient_wq, &sp->work,
srcu_get_delay(sp));
}
raw_spin_unlock_irqrestore_rcu_node(sp, flags);
spin_unlock_irqrestore_rcu_node(sp, flags);
}
/*
@@ -830,7 +857,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
rhp->func = func;
local_irq_save(flags);
sdp = this_cpu_ptr(sp->sda);
raw_spin_lock_rcu_node(sdp);
spin_lock_rcu_node(sdp);
rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
rcu_segcblist_advance(&sdp->srcu_cblist,
rcu_seq_current(&sp->srcu_gp_seq));
@@ -844,7 +871,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
sdp->srcu_gp_seq_needed_exp = s;
needexp = true;
}
raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
spin_unlock_irqrestore_rcu_node(sdp, flags);
if (needgp)
srcu_funnel_gp_start(sp, sdp, s, do_norm);
else if (needexp)
@@ -900,7 +927,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
/*
* Make sure that later code is ordered after the SRCU grace
* period. This pairs with the raw_spin_lock_irq_rcu_node()
* period. This pairs with the spin_lock_irq_rcu_node()
* in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed
* because the current CPU might have been totally uninvolved with
* (and thus unordered against) that grace period.
@@ -1024,7 +1051,7 @@ void srcu_barrier(struct srcu_struct *sp)
*/
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(sp->sda, cpu);
raw_spin_lock_irq_rcu_node(sdp);
spin_lock_irq_rcu_node(sdp);
atomic_inc(&sp->srcu_barrier_cpu_cnt);
sdp->srcu_barrier_head.func = srcu_barrier_cb;
debug_rcu_head_queue(&sdp->srcu_barrier_head);
@@ -1033,7 +1060,7 @@ void srcu_barrier(struct srcu_struct *sp)
debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
atomic_dec(&sp->srcu_barrier_cpu_cnt);
}
raw_spin_unlock_irq_rcu_node(sdp);
spin_unlock_irq_rcu_node(sdp);
}
/* Remove the initial count, at which point reaching zero can happen. */
@@ -1082,17 +1109,17 @@ static void srcu_advance_state(struct srcu_struct *sp)
*/
idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
if (idx == SRCU_STATE_IDLE) {
raw_spin_lock_irq_rcu_node(sp);
spin_lock_irq_rcu_node(sp);
if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
raw_spin_unlock_irq_rcu_node(sp);
spin_unlock_irq_rcu_node(sp);
mutex_unlock(&sp->srcu_gp_mutex);
return;
}
idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
if (idx == SRCU_STATE_IDLE)
srcu_gp_start(sp);
raw_spin_unlock_irq_rcu_node(sp);
spin_unlock_irq_rcu_node(sp);
if (idx != SRCU_STATE_IDLE) {
mutex_unlock(&sp->srcu_gp_mutex);
return; /* Someone else started the grace period. */
@@ -1141,19 +1168,19 @@ static void srcu_invoke_callbacks(struct work_struct *work)
sdp = container_of(work, struct srcu_data, work.work);
sp = sdp->sp;
rcu_cblist_init(&ready_cbs);
raw_spin_lock_irq_rcu_node(sdp);
spin_lock_irq_rcu_node(sdp);
rcu_segcblist_advance(&sdp->srcu_cblist,
rcu_seq_current(&sp->srcu_gp_seq));
if (sdp->srcu_cblist_invoking ||
!rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
raw_spin_unlock_irq_rcu_node(sdp);
spin_unlock_irq_rcu_node(sdp);
return; /* Someone else on the job or nothing to do. */
}
/* We are on the job! Extract and invoke ready callbacks. */
sdp->srcu_cblist_invoking = true;
rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
raw_spin_unlock_irq_rcu_node(sdp);
spin_unlock_irq_rcu_node(sdp);
rhp = rcu_cblist_dequeue(&ready_cbs);
for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
debug_rcu_head_unqueue(rhp);
@@ -1166,13 +1193,13 @@ static void srcu_invoke_callbacks(struct work_struct *work)
* Update counts, accelerate new callbacks, and if needed,
* schedule another round of callback invocation.
*/
raw_spin_lock_irq_rcu_node(sdp);
spin_lock_irq_rcu_node(sdp);
rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
rcu_seq_snap(&sp->srcu_gp_seq));
sdp->srcu_cblist_invoking = false;
more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
raw_spin_unlock_irq_rcu_node(sdp);
spin_unlock_irq_rcu_node(sdp);
if (more)
srcu_schedule_cbs_sdp(sdp, 0);
}
@@ -1185,7 +1212,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
{
bool pushgp = true;
raw_spin_lock_irq_rcu_node(sp);
spin_lock_irq_rcu_node(sp);
if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
/* All requests fulfilled, time to go idle. */
@@ -1195,7 +1222,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
/* Outstanding request and no GP. Start one. */
srcu_gp_start(sp);
}
raw_spin_unlock_irq_rcu_node(sp);
spin_unlock_irq_rcu_node(sp);
if (pushgp)
queue_delayed_work(system_power_efficient_wq, &sp->work, delay);

View File

@@ -265,24 +265,11 @@ void rcu_bh_qs(void)
#endif
static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
.dynticks_nesting = 1,
.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
};
/*
* There's a few places, currently just in the tracing infrastructure,
* that uses rcu_irq_enter() to make sure RCU is watching. But there's
* a small location where that will not even work. In those cases
* rcu_irq_enter_disabled() needs to be checked to make sure rcu_irq_enter()
* can be called.
*/
static DEFINE_PER_CPU(bool, disable_rcu_irq_enter);
bool rcu_irq_enter_disabled(void)
{
return this_cpu_read(disable_rcu_irq_enter);
}
/*
* Record entry into an extended quiescent state. This is only to be
* called when not already in an extended quiescent state.
@@ -762,68 +749,39 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
}
/*
* rcu_eqs_enter_common - current CPU is entering an extended quiescent state
* Enter an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
*
* Enter idle, doing appropriate accounting. The caller must have
* disabled interrupts.
* We crowbar the ->dynticks_nmi_nesting field to zero to allow for
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*/
static void rcu_eqs_enter_common(bool user)
static void rcu_eqs_enter(bool user)
{
struct rcu_state *rsp;
struct rcu_data *rdp;
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
struct rcu_dynticks *rdtp;
rdtp = this_cpu_ptr(&rcu_dynticks);
WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0);
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
rdtp->dynticks_nesting == 0);
if (rdtp->dynticks_nesting != 1) {
rdtp->dynticks_nesting--;
return;
}
lockdep_assert_irqs_disabled();
trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0);
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
!user && !is_idle_task(current)) {
struct task_struct *idle __maybe_unused =
idle_task(smp_processor_id());
trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0);
rcu_ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks);
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
for_each_rcu_flavor(rsp) {
rdp = this_cpu_ptr(rsp->rda);
do_nocb_deferred_wakeup(rdp);
}
rcu_prepare_for_idle();
__this_cpu_inc(disable_rcu_irq_enter);
rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */
rcu_dynticks_eqs_enter(); /* After this, tracing works again. */
__this_cpu_dec(disable_rcu_irq_enter);
WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
rcu_dynticks_eqs_enter();
rcu_dynticks_task_enter();
/*
* It is illegal to enter an extended quiescent state while
* in an RCU read-side critical section.
*/
RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map),
"Illegal idle entry in RCU read-side critical section.");
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map),
"Illegal idle entry in RCU-bh read-side critical section.");
RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map),
"Illegal idle entry in RCU-sched read-side critical section.");
}
/*
* Enter an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
*/
static void rcu_eqs_enter(bool user)
{
struct rcu_dynticks *rdtp;
rdtp = this_cpu_ptr(&rcu_dynticks);
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
rcu_eqs_enter_common(user);
else
rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
}
/**
@@ -834,10 +792,6 @@ static void rcu_eqs_enter(bool user)
* critical sections can occur in irq handlers in idle, a possibility
* handled by irq_enter() and irq_exit().)
*
* We crowbar the ->dynticks_nesting field to zero to allow for
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*
* If you add or remove a call to rcu_idle_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
@@ -866,6 +820,46 @@ void rcu_user_enter(void)
}
#endif /* CONFIG_NO_HZ_FULL */
/**
* rcu_nmi_exit - inform RCU of exit from NMI context
*
* If we are returning from the outermost NMI handler that interrupted an
* RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
* to let the RCU grace-period handling know that the CPU is back to
* being RCU-idle.
*
* If you add or remove a call to rcu_nmi_exit(), be sure to test
* with CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_nmi_exit(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
/*
* Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
* (We are exiting an NMI handler, so RCU better be paying attention
* to us!)
*/
WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
/*
* If the nesting level is not 1, the CPU wasn't RCU-idle, so
* leave it in non-RCU-idle state.
*/
if (rdtp->dynticks_nmi_nesting != 1) {
trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, rdtp->dynticks_nmi_nesting - 2, rdtp->dynticks);
WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */
rdtp->dynticks_nmi_nesting - 2);
return;
}
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0, rdtp->dynticks);
WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
rcu_dynticks_eqs_enter();
}
/**
* rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
*
@@ -875,8 +869,8 @@ void rcu_user_enter(void)
*
* This code assumes that the idle loop never does anything that might
* result in unbalanced calls to irq_enter() and irq_exit(). If your
* architecture violates this assumption, RCU will give you what you
* deserve, good and hard. But very infrequently and irreproducibly.
* architecture's idle loop violates this assumption, RCU will give you what
* you deserve, good and hard. But very infrequently and irreproducibly.
*
* Use things like work queues to work around this limitation.
*
@@ -887,23 +881,14 @@ void rcu_user_enter(void)
*/
void rcu_irq_exit(void)
{
struct rcu_dynticks *rdtp;
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
lockdep_assert_irqs_disabled();
rdtp = this_cpu_ptr(&rcu_dynticks);
/* Page faults can happen in NMI handlers, so check... */
if (rdtp->dynticks_nmi_nesting)
return;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
rdtp->dynticks_nesting < 1);
if (rdtp->dynticks_nesting <= 1) {
rcu_eqs_enter_common(true);
} else {
trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1);
rdtp->dynticks_nesting--;
}
if (rdtp->dynticks_nmi_nesting == 1)
rcu_prepare_for_idle();
rcu_nmi_exit();
if (rdtp->dynticks_nmi_nesting == 0)
rcu_dynticks_task_enter();
}
/*
@@ -921,56 +906,34 @@ void rcu_irq_exit_irqson(void)
local_irq_restore(flags);
}
/*
* rcu_eqs_exit_common - current CPU moving away from extended quiescent state
*
* If the new value of the ->dynticks_nesting counter was previously zero,
* we really have exited idle, and must do the appropriate accounting.
* The caller must have disabled interrupts.
*/
static void rcu_eqs_exit_common(long long oldval, int user)
{
RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);)
rcu_dynticks_task_exit();
rcu_dynticks_eqs_exit();
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
!user && !is_idle_task(current)) {
struct task_struct *idle __maybe_unused =
idle_task(smp_processor_id());
trace_rcu_dyntick(TPS("Error on exit: not idle task"),
oldval, rdtp->dynticks_nesting);
rcu_ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
}
/*
* Exit an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
*
* We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
* allow for the possibility of usermode upcalls messing up our count of
* interrupt nesting level during the busy period that is just now starting.
*/
static void rcu_eqs_exit(bool user)
{
struct rcu_dynticks *rdtp;
long long oldval;
long oldval;
lockdep_assert_irqs_disabled();
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
if (oldval & DYNTICK_TASK_NEST_MASK) {
rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
} else {
__this_cpu_inc(disable_rcu_irq_enter);
rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
rcu_eqs_exit_common(oldval, user);
__this_cpu_dec(disable_rcu_irq_enter);
if (oldval) {
rdtp->dynticks_nesting++;
return;
}
rcu_dynticks_task_exit();
rcu_dynticks_eqs_exit();
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks);
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
WRITE_ONCE(rdtp->dynticks_nesting, 1);
WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
}
/**
@@ -979,11 +942,6 @@ static void rcu_eqs_exit(bool user)
* Exit idle mode, in other words, -enter- the mode in which RCU
* read-side critical sections can occur.
*
* We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
* allow for the possibility of usermode upcalls messing up our count
* of interrupt nesting level during the busy period that is just
* now starting.
*
* If you add or remove a call to rcu_idle_exit(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
@@ -1012,65 +970,6 @@ void rcu_user_exit(void)
}
#endif /* CONFIG_NO_HZ_FULL */
/**
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
*
* Enter an interrupt handler, which might possibly result in exiting
* idle mode, in other words, entering the mode in which read-side critical
* sections can occur. The caller must have disabled interrupts.
*
* Note that the Linux kernel is fully capable of entering an interrupt
* handler that it never exits, for example when doing upcalls to
* user mode! This code assumes that the idle loop never does upcalls to
* user mode. If your architecture does do upcalls from the idle loop (or
* does anything else that results in unbalanced calls to the irq_enter()
* and irq_exit() functions), RCU will give you what you deserve, good
* and hard. But very infrequently and irreproducibly.
*
* Use things like work queues to work around this limitation.
*
* You have been warned.
*
* If you add or remove a call to rcu_irq_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_irq_enter(void)
{
struct rcu_dynticks *rdtp;
long long oldval;
lockdep_assert_irqs_disabled();
rdtp = this_cpu_ptr(&rcu_dynticks);
/* Page faults can happen in NMI handlers, so check... */
if (rdtp->dynticks_nmi_nesting)
return;
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
rdtp->dynticks_nesting == 0);
if (oldval)
trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
else
rcu_eqs_exit_common(oldval, true);
}
/*
* Wrapper for rcu_irq_enter() where interrupts are enabled.
*
* If you add or remove a call to rcu_irq_enter_irqson(), be sure to test
* with CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_irq_enter_irqson(void)
{
unsigned long flags;
local_irq_save(flags);
rcu_irq_enter();
local_irq_restore(flags);
}
/**
* rcu_nmi_enter - inform RCU of entry to NMI context
*
@@ -1086,7 +985,7 @@ void rcu_irq_enter_irqson(void)
void rcu_nmi_enter(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
int incby = 2;
long incby = 2;
/* Complain about underflow. */
WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0);
@@ -1103,45 +1002,61 @@ void rcu_nmi_enter(void)
rcu_dynticks_eqs_exit();
incby = 1;
}
rdtp->dynticks_nmi_nesting += incby;
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
rdtp->dynticks_nmi_nesting,
rdtp->dynticks_nmi_nesting + incby, rdtp->dynticks);
WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */
rdtp->dynticks_nmi_nesting + incby);
barrier();
}
/**
* rcu_nmi_exit - inform RCU of exit from NMI context
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
*
* If we are returning from the outermost NMI handler that interrupted an
* RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
* to let the RCU grace-period handling know that the CPU is back to
* being RCU-idle.
* Enter an interrupt handler, which might possibly result in exiting
* idle mode, in other words, entering the mode in which read-side critical
* sections can occur. The caller must have disabled interrupts.
*
* If you add or remove a call to rcu_nmi_exit(), be sure to test
* with CONFIG_RCU_EQS_DEBUG=y.
* Note that the Linux kernel is fully capable of entering an interrupt
* handler that it never exits, for example when doing upcalls to user mode!
* This code assumes that the idle loop never does upcalls to user mode.
* If your architecture's idle loop does do upcalls to user mode (or does
* anything else that results in unbalanced calls to the irq_enter() and
* irq_exit() functions), RCU will give you what you deserve, good and hard.
* But very infrequently and irreproducibly.
*
* Use things like work queues to work around this limitation.
*
* You have been warned.
*
* If you add or remove a call to rcu_irq_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_nmi_exit(void)
void rcu_irq_enter(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
/*
* Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
* (We are exiting an NMI handler, so RCU better be paying attention
* to us!)
*/
WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
lockdep_assert_irqs_disabled();
if (rdtp->dynticks_nmi_nesting == 0)
rcu_dynticks_task_exit();
rcu_nmi_enter();
if (rdtp->dynticks_nmi_nesting == 1)
rcu_cleanup_after_idle();
}
/*
* If the nesting level is not 1, the CPU wasn't RCU-idle, so
* leave it in non-RCU-idle state.
*/
if (rdtp->dynticks_nmi_nesting != 1) {
rdtp->dynticks_nmi_nesting -= 2;
return;
}
/*
* Wrapper for rcu_irq_enter() where interrupts are enabled.
*
* If you add or remove a call to rcu_irq_enter_irqson(), be sure to test
* with CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_irq_enter_irqson(void)
{
unsigned long flags;
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
rdtp->dynticks_nmi_nesting = 0;
rcu_dynticks_eqs_enter();
local_irq_save(flags);
rcu_irq_enter();
local_irq_restore(flags);
}
/**
@@ -1233,7 +1148,8 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
*/
static int rcu_is_cpu_rrupt_from_idle(void)
{
return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1;
return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
__this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
}
/*
@@ -2789,6 +2705,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
rdp->n_force_qs_snap = rsp->n_force_qs;
} else if (count < rdp->qlen_last_fqs_check - qhimark)
rdp->qlen_last_fqs_check = count;
/*
* The following usually indicates a double call_rcu(). To track
* this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y.
*/
WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));
local_irq_restore(flags);
@@ -3723,7 +3644,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks)));
rdp->cpu = cpu;
rdp->rsp = rsp;
@@ -3752,7 +3673,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
!init_nocb_callback_list(rdp))
rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
rdp->dynticks->dynticks_nesting = 1; /* CPU not up, no tearing. */
rcu_dynticks_eqs_online();
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */

View File

@@ -38,9 +38,8 @@
* Dynticks per-CPU state.
*/
struct rcu_dynticks {
long long dynticks_nesting; /* Track irq/process nesting level. */
/* Process level is worth LLONG_MAX/2. */
int dynticks_nmi_nesting; /* Track NMI nesting level. */
long dynticks_nesting; /* Track process nesting level. */
long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */
atomic_t dynticks; /* Even value for idle, else odd. */
bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */
unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */

View File

@@ -61,7 +61,6 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);
#ifdef CONFIG_RCU_NOCB_CPU
static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
@@ -1687,7 +1686,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
}
print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum;
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n",
cpu,
"O."[!!cpu_online(cpu)],
"o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
@@ -1752,7 +1751,6 @@ static void increment_cpu_stall_ticks(void)
static int __init rcu_nocb_setup(char *str)
{
alloc_bootmem_cpumask_var(&rcu_nocb_mask);
have_rcu_nocb_mask = true;
cpulist_parse(str, rcu_nocb_mask);
return 1;
}
@@ -1801,7 +1799,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
/* Is the specified CPU a no-CBs CPU? */
bool rcu_is_nocb_cpu(int cpu)
{
if (have_rcu_nocb_mask)
if (cpumask_available(rcu_nocb_mask))
return cpumask_test_cpu(cpu, rcu_nocb_mask);
return false;
}
@@ -2295,14 +2293,13 @@ void __init rcu_init_nohz(void)
need_rcu_nocb_mask = true;
#endif /* #if defined(CONFIG_NO_HZ_FULL) */
if (!have_rcu_nocb_mask && need_rcu_nocb_mask) {
if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
return;
}
have_rcu_nocb_mask = true;
}
if (!have_rcu_nocb_mask)
if (!cpumask_available(rcu_nocb_mask))
return;
#if defined(CONFIG_NO_HZ_FULL)
@@ -2428,7 +2425,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */
struct rcu_data *rdp_prev = NULL;
if (!have_rcu_nocb_mask)
if (!cpumask_available(rcu_nocb_mask))
return;
if (ls == -1) {
ls = int_sqrt(nr_cpu_ids);