rcu: Define RCU-sched API in terms of RCU for Tree RCU PREEMPT builds
Now that RCU-preempt knows about preemption disabling, its implementation of synchronize_rcu() works for synchronize_sched(), and likewise for the other RCU-sched update-side API members. This commit therefore confines the RCU-sched update-side code to CONFIG_PREEMPT=n builds, and defines RCU-sched's update-side API members in terms of those of RCU-preempt. This means that any given build of the Linux kernel has only one update-side flavor of RCU, namely RCU-preempt for CONFIG_PREEMPT=y builds and RCU-sched for CONFIG_PREEMPT=n builds. This in turn means that kernels built with CONFIG_RCU_NOCB_CPU=y have only one rcuo kthread per CPU. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Andi Kleen <ak@linux.intel.com>
This commit is contained in:
@@ -123,10 +123,6 @@ static void __init rcu_bootup_announce_oddness(void)
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
|
||||
RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
|
||||
static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
|
||||
static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
|
||||
|
||||
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
bool wake);
|
||||
static void rcu_read_unlock_special(struct task_struct *t);
|
||||
@@ -303,15 +299,15 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
*
|
||||
* Callers to this function must disable preemption.
|
||||
*/
|
||||
static void rcu_preempt_qs(void)
|
||||
static void rcu_qs(void)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
|
||||
RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
|
||||
if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
|
||||
trace_rcu_grace_period(TPS("rcu_preempt"),
|
||||
__this_cpu_read(rcu_data_p->gp_seq),
|
||||
TPS("cpuqs"));
|
||||
__this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
|
||||
barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
|
||||
barrier(); /* Coordinate with rcu_flavor_check_callbacks(). */
|
||||
current->rcu_read_unlock_special.b.need_qs = false;
|
||||
}
|
||||
}
|
||||
@@ -329,12 +325,14 @@ static void rcu_preempt_qs(void)
|
||||
*
|
||||
* Caller must disable interrupts.
|
||||
*/
|
||||
static void rcu_preempt_note_context_switch(bool preempt)
|
||||
void rcu_note_context_switch(bool preempt)
|
||||
{
|
||||
struct task_struct *t = current;
|
||||
struct rcu_data *rdp = this_cpu_ptr(rcu_state_p->rda);
|
||||
struct rcu_node *rnp;
|
||||
|
||||
barrier(); /* Avoid RCU read-side critical sections leaking down. */
|
||||
trace_rcu_utilization(TPS("Start context switch"));
|
||||
lockdep_assert_irqs_disabled();
|
||||
WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
|
||||
if (t->rcu_read_lock_nesting > 0 &&
|
||||
@@ -381,10 +379,13 @@ static void rcu_preempt_note_context_switch(bool preempt)
|
||||
* grace period, then the fact that the task has been enqueued
|
||||
* means that we continue to block the current grace period.
|
||||
*/
|
||||
rcu_preempt_qs();
|
||||
rcu_qs();
|
||||
if (rdp->deferred_qs)
|
||||
rcu_report_exp_rdp(rcu_state_p, rdp);
|
||||
trace_rcu_utilization(TPS("End context switch"));
|
||||
barrier(); /* Avoid RCU read-side critical sections leaking up. */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
||||
|
||||
/*
|
||||
* Check for preempted RCU readers blocking the current grace period
|
||||
@@ -493,7 +494,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
|
||||
return;
|
||||
}
|
||||
if (special.b.need_qs) {
|
||||
rcu_preempt_qs();
|
||||
rcu_qs();
|
||||
t->rcu_read_unlock_special.b.need_qs = false;
|
||||
if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
|
||||
local_irq_restore(flags);
|
||||
@@ -596,7 +597,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
|
||||
*/
|
||||
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
|
||||
{
|
||||
return (this_cpu_ptr(&rcu_preempt_data)->deferred_qs ||
|
||||
return (this_cpu_ptr(&rcu_data)->deferred_qs ||
|
||||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
|
||||
t->rcu_read_lock_nesting <= 0;
|
||||
}
|
||||
@@ -781,11 +782,14 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
*
|
||||
* Caller must disable hard irqs.
|
||||
*/
|
||||
static void rcu_preempt_check_callbacks(void)
|
||||
static void rcu_flavor_check_callbacks(int user)
|
||||
{
|
||||
struct rcu_state *rsp = &rcu_preempt_state;
|
||||
struct rcu_state *rsp = &rcu_state;
|
||||
struct task_struct *t = current;
|
||||
|
||||
if (user || rcu_is_cpu_rrupt_from_idle()) {
|
||||
rcu_note_voluntary_context_switch(current);
|
||||
}
|
||||
if (t->rcu_read_lock_nesting > 0 ||
|
||||
(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
|
||||
/* No QS, force context switch if deferred. */
|
||||
@@ -795,7 +799,7 @@ static void rcu_preempt_check_callbacks(void)
|
||||
rcu_preempt_deferred_qs(t); /* Report deferred QS. */
|
||||
return;
|
||||
} else if (!t->rcu_read_lock_nesting) {
|
||||
rcu_preempt_qs(); /* Report immediate QS. */
|
||||
rcu_qs(); /* Report immediate QS. */
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -808,44 +812,6 @@ static void rcu_preempt_check_callbacks(void)
|
||||
t->rcu_read_unlock_special.b.need_qs = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* call_rcu() - Queue an RCU callback for invocation after a grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
* @func: actual callback function to be invoked after the grace period
|
||||
*
|
||||
* The callback function will be invoked some time after a full grace
|
||||
* period elapses, in other words after all pre-existing RCU read-side
|
||||
* critical sections have completed. However, the callback function
|
||||
* might well execute concurrently with RCU read-side critical sections
|
||||
* that started after call_rcu() was invoked. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
|
||||
* and may be nested.
|
||||
*
|
||||
* Note that all CPUs must agree that the grace period extended beyond
|
||||
* all pre-existing RCU read-side critical section. On systems with more
|
||||
* than one CPU, this means that when "func()" is invoked, each CPU is
|
||||
* guaranteed to have executed a full memory barrier since the end of its
|
||||
* last RCU read-side critical section whose beginning preceded the call
|
||||
* to call_rcu(). It also means that each CPU executing an RCU read-side
|
||||
* critical section that continues beyond the start of "func()" must have
|
||||
* executed a memory barrier after the call_rcu() but before the beginning
|
||||
* of that RCU read-side critical section. Note that these guarantees
|
||||
* include CPUs that are offline, idle, or executing in user mode, as
|
||||
* well as CPUs that are executing in the kernel.
|
||||
*
|
||||
* Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
|
||||
* resulting RCU callback function "func()", then both CPU A and CPU B are
|
||||
* guaranteed to execute a full memory barrier during the time interval
|
||||
* between the call to call_rcu() and the invocation of "func()" -- even
|
||||
* if CPU A and CPU B are the same CPU (but again only if the system has
|
||||
* more than one CPU).
|
||||
*/
|
||||
void call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
{
|
||||
__call_rcu(head, func, rcu_state_p, -1, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
|
||||
/**
|
||||
* synchronize_rcu - wait until a grace period has elapsed.
|
||||
*
|
||||
@@ -856,14 +822,28 @@ EXPORT_SYMBOL_GPL(call_rcu);
|
||||
* concurrently with new RCU read-side critical sections that began while
|
||||
* synchronize_rcu() was waiting. RCU read-side critical sections are
|
||||
* delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
|
||||
* In addition, regions of code across which interrupts, preemption, or
|
||||
* softirqs have been disabled also serve as RCU read-side critical
|
||||
* sections. This includes hardware interrupt handlers, softirq handlers,
|
||||
* and NMI handlers.
|
||||
*
|
||||
* See the description of synchronize_sched() for more detailed
|
||||
* information on memory-ordering guarantees. However, please note
|
||||
* that -only- the memory-ordering guarantees apply. For example,
|
||||
* synchronize_rcu() is -not- guaranteed to wait on things like code
|
||||
* protected by preempt_disable(), instead, synchronize_rcu() is -only-
|
||||
* guaranteed to wait on RCU read-side critical sections, that is, sections
|
||||
* of code protected by rcu_read_lock().
|
||||
* Note that this guarantee implies further memory-ordering guarantees.
|
||||
* On systems with more than one CPU, when synchronize_rcu() returns,
|
||||
* each CPU is guaranteed to have executed a full memory barrier since the
|
||||
* end of its last RCU-sched read-side critical section whose beginning
|
||||
* preceded the call to synchronize_rcu(). In addition, each CPU having
|
||||
* an RCU read-side critical section that extends beyond the return from
|
||||
* synchronize_rcu() is guaranteed to have executed a full memory barrier
|
||||
* after the beginning of synchronize_rcu() and before the beginning of
|
||||
* that RCU read-side critical section. Note that these guarantees include
|
||||
* CPUs that are offline, idle, or executing in user mode, as well as CPUs
|
||||
* that are executing in the kernel.
|
||||
*
|
||||
* Furthermore, if CPU A invoked synchronize_rcu(), which returned
|
||||
* to its caller on CPU B, then both CPU A and CPU B are guaranteed
|
||||
* to have executed a full memory barrier during the execution of
|
||||
* synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
|
||||
* again only if the system has more than one CPU).
|
||||
*/
|
||||
void synchronize_rcu(void)
|
||||
{
|
||||
@@ -880,28 +860,6 @@ void synchronize_rcu(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
||||
|
||||
/**
|
||||
* rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
|
||||
*
|
||||
* Note that this primitive does not necessarily wait for an RCU grace period
|
||||
* to complete. For example, if there are no RCU callbacks queued anywhere
|
||||
* in the system, then rcu_barrier() is within its rights to return
|
||||
* immediately, without waiting for anything, much less an RCU grace period.
|
||||
*/
|
||||
void rcu_barrier(void)
|
||||
{
|
||||
_rcu_barrier(rcu_state_p);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
/*
|
||||
* Initialize preemptible RCU's state structures.
|
||||
*/
|
||||
static void __init __rcu_init_preempt(void)
|
||||
{
|
||||
rcu_init_one(rcu_state_p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for a task exiting while in a preemptible-RCU read-side
|
||||
* critical section, clean up if so. No need to issue warnings,
|
||||
@@ -964,8 +922,6 @@ dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)
|
||||
|
||||
#else /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
|
||||
static struct rcu_state *const rcu_state_p = &rcu_sched_state;
|
||||
|
||||
/*
|
||||
* Tell them what RCU they are running.
|
||||
*/
|
||||
@@ -975,18 +931,48 @@ static void __init rcu_bootup_announce(void)
|
||||
rcu_bootup_announce_oddness();
|
||||
}
|
||||
|
||||
/* Because preemptible RCU does not exist, we can ignore its QSes. */
|
||||
static void rcu_preempt_qs(void)
|
||||
/*
|
||||
* Note a quiescent state for PREEMPT=n. Because we do not need to know
|
||||
* how many quiescent states passed, just if there was at least one since
|
||||
* the start of the grace period, this just sets a flag. The caller must
|
||||
* have disabled preemption.
|
||||
*/
|
||||
static void rcu_qs(void)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
|
||||
if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
|
||||
return;
|
||||
trace_rcu_grace_period(TPS("rcu_sched"),
|
||||
__this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
|
||||
__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
|
||||
if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
|
||||
return;
|
||||
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
|
||||
rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(&rcu_data));
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, we never have to check for
|
||||
* CPUs being in quiescent states.
|
||||
* Note a PREEMPT=n context switch. The caller must have disabled interrupts.
|
||||
*/
|
||||
static void rcu_preempt_note_context_switch(bool preempt)
|
||||
void rcu_note_context_switch(bool preempt)
|
||||
{
|
||||
barrier(); /* Avoid RCU read-side critical sections leaking down. */
|
||||
trace_rcu_utilization(TPS("Start context switch"));
|
||||
rcu_qs();
|
||||
/* Load rcu_urgent_qs before other flags. */
|
||||
if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
|
||||
goto out;
|
||||
this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
|
||||
if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
|
||||
rcu_momentary_dyntick_idle();
|
||||
this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
|
||||
if (!preempt)
|
||||
rcu_tasks_qs(current);
|
||||
out:
|
||||
trace_rcu_utilization(TPS("End context switch"));
|
||||
barrier(); /* Avoid RCU read-side critical sections leaking up. */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, there are never any preempted
|
||||
@@ -1054,29 +1040,48 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, it never has any callbacks
|
||||
* to check.
|
||||
* Check to see if this CPU is in a non-context-switch quiescent state
|
||||
* (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
|
||||
* Also schedule RCU core processing.
|
||||
*
|
||||
* This function must be called from hardirq context. It is normally
|
||||
* invoked from the scheduling-clock interrupt.
|
||||
*/
|
||||
static void rcu_preempt_check_callbacks(void)
|
||||
static void rcu_flavor_check_callbacks(int user)
|
||||
{
|
||||
if (user || rcu_is_cpu_rrupt_from_idle()) {
|
||||
|
||||
/*
|
||||
* Get here if this CPU took its interrupt from user
|
||||
* mode or from the idle loop, and if this is not a
|
||||
* nested interrupt. In this case, the CPU is in
|
||||
* a quiescent state, so note it.
|
||||
*
|
||||
* No memory barrier is required here because rcu_qs()
|
||||
* references only CPU-local variables that other CPUs
|
||||
* neither access nor modify, at least not while the
|
||||
* corresponding CPU is online.
|
||||
*/
|
||||
|
||||
rcu_qs();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, rcu_barrier() is just
|
||||
* another name for rcu_barrier_sched().
|
||||
*/
|
||||
void rcu_barrier(void)
|
||||
{
|
||||
rcu_barrier_sched();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, it need not be initialized.
|
||||
*/
|
||||
static void __init __rcu_init_preempt(void)
|
||||
/* PREEMPT=n implementation of synchronize_rcu(). */
|
||||
void synchronize_rcu(void)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
|
||||
lock_is_held(&rcu_lock_map) ||
|
||||
lock_is_held(&rcu_sched_lock_map),
|
||||
"Illegal synchronize_rcu() in RCU-sched read-side critical section");
|
||||
if (rcu_blocking_is_gp())
|
||||
return;
|
||||
if (rcu_gp_is_expedited())
|
||||
synchronize_rcu_expedited();
|
||||
else
|
||||
wait_rcu_gp(call_rcu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, tasks cannot possibly exit
|
||||
@@ -1319,8 +1324,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
|
||||
|
||||
static void rcu_kthread_do_work(void)
|
||||
{
|
||||
rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
|
||||
rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
|
||||
rcu_do_batch(&rcu_state, this_cpu_ptr(&rcu_data));
|
||||
}
|
||||
|
||||
static void rcu_cpu_kthread_setup(unsigned int cpu)
|
||||
@@ -1727,87 +1731,6 @@ static void rcu_idle_count_callbacks_posted(void)
|
||||
__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Data for flushing lazy RCU callbacks at OOM time.
|
||||
*/
|
||||
static atomic_t oom_callback_count;
|
||||
static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
|
||||
|
||||
/*
|
||||
* RCU OOM callback -- decrement the outstanding count and deliver the
|
||||
* wake-up if we are the last one.
|
||||
*/
|
||||
static void rcu_oom_callback(struct rcu_head *rhp)
|
||||
{
|
||||
if (atomic_dec_and_test(&oom_callback_count))
|
||||
wake_up(&oom_callback_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Post an rcu_oom_notify callback on the current CPU if it has at
|
||||
* least one lazy callback. This will unnecessarily post callbacks
|
||||
* to CPUs that already have a non-lazy callback at the end of their
|
||||
* callback list, but this is an infrequent operation, so accept some
|
||||
* extra overhead to keep things simple.
|
||||
*/
|
||||
static void rcu_oom_notify_cpu(void *unused)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
for_each_rcu_flavor(rsp) {
|
||||
rdp = raw_cpu_ptr(rsp->rda);
|
||||
if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
|
||||
atomic_inc(&oom_callback_count);
|
||||
rsp->call(&rdp->oom_head, rcu_oom_callback);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If low on memory, ensure that each CPU has a non-lazy callback.
|
||||
* This will wake up CPUs that have only lazy callbacks, in turn
|
||||
* ensuring that they free up the corresponding memory in a timely manner.
|
||||
* Because an uncertain amount of memory will be freed in some uncertain
|
||||
* timeframe, we do not claim to have freed anything.
|
||||
*/
|
||||
static int rcu_oom_notify(struct notifier_block *self,
|
||||
unsigned long notused, void *nfreed)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
/* Wait for callbacks from earlier instance to complete. */
|
||||
wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
|
||||
smp_mb(); /* Ensure callback reuse happens after callback invocation. */
|
||||
|
||||
/*
|
||||
* Prevent premature wakeup: ensure that all increments happen
|
||||
* before there is a chance of the counter reaching zero.
|
||||
*/
|
||||
atomic_set(&oom_callback_count, 1);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
|
||||
cond_resched_tasks_rcu_qs();
|
||||
}
|
||||
|
||||
/* Unconditionally decrement: no need to wake ourselves up. */
|
||||
atomic_dec(&oom_callback_count);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block rcu_oom_nb = {
|
||||
.notifier_call = rcu_oom_notify
|
||||
};
|
||||
|
||||
static int __init rcu_register_oom_notifier(void)
|
||||
{
|
||||
register_oom_notifier(&rcu_oom_nb);
|
||||
return 0;
|
||||
}
|
||||
early_initcall(rcu_register_oom_notifier);
|
||||
|
||||
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
|
||||
|
||||
#ifdef CONFIG_RCU_FAST_NO_HZ
|
||||
|
Reference in New Issue
Block a user