Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RCU updates from Ingo Molnar:
 "The main changes in this cycle are mostly related to preparatory work
  for the full-dynticks work:

   - Remove restrictions on no-CBs CPUs, make RCU_FAST_NO_HZ take
     advantage of numbered callbacks, do callback accelerations based on
     numbered callbacks.  Posted to LKML at
        https://lkml.org/lkml/2013/3/18/960

   - RCU documentation updates.  Posted to LKML at
        https://lkml.org/lkml/2013/3/18/570

   - Miscellaneous fixes.  Posted to LKML at
        https://lkml.org/lkml/2013/3/18/594"

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  rcu: Make rcu_accelerate_cbs() note need for future grace periods
  rcu: Abstract rcu_start_future_gp() from rcu_nocb_wait_gp()
  rcu: Rename n_nocb_gp_requests to need_future_gp
  rcu: Push lock release to rcu_start_gp()'s callers
  rcu: Repurpose no-CBs event tracing to future-GP events
  rcu: Rearrange locking in rcu_start_gp()
  rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks
  rcu: Accelerate RCU callbacks at grace-period end
  rcu: Export RCU_FAST_NO_HZ parameters to sysfs
  rcu: Distinguish "rcuo" kthreads by RCU flavor
  rcu: Add event tracing for no-CBs CPUs' grace periods
  rcu: Add event tracing for no-CBs CPUs' callback registration
  rcu: Introduce proper blocking to no-CBs kthreads GP waits
  rcu: Provide compile-time control for no-CBs CPUs
  rcu: Tone down debugging during boot-up and shutdown.
  rcu: Add softirq-stall indications to stall-warning messages
  rcu: Documentation update
  rcu: Make bugginess of code sample more evident
  rcu: Fix hlist_bl_set_first_rcu() annotation
  rcu: Delete unused rcu_node "wakemask" field
  ...
This commit is contained in:
Linus Torvalds
2013-04-30 07:39:01 -07:00
15 changed files with 650 additions and 528 deletions

View File

@@ -64,7 +64,7 @@
static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
#define RCU_STATE_INITIALIZER(sname, cr) { \
#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \
.level = { &sname##_state.node[0] }, \
.call = cr, \
.fqs_state = RCU_GP_IDLE, \
@@ -76,13 +76,14 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
.onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
.name = #sname, \
.abbr = sabbr, \
}
struct rcu_state rcu_sched_state =
RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
static struct rcu_state *rcu_state;
@@ -223,6 +224,8 @@ static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
module_param(jiffies_till_first_fqs, ulong, 0644);
module_param(jiffies_till_next_fqs, ulong, 0644);
static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp);
static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
static void force_quiescent_state(struct rcu_state *rsp);
static int rcu_pending(int cpu);
@@ -310,6 +313,8 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
if (rcu_gp_in_progress(rsp))
return 0; /* No, a grace period is already in progress. */
if (rcu_nocb_needs_gp(rsp))
return 1; /* Yes, a no-CBs CPU needs one. */
if (!rdp->nxttail[RCU_NEXT_TAIL])
return 0; /* No, this is a no-CBs (or offline) CPU. */
if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
@@ -1035,10 +1040,11 @@ static void init_callback_list(struct rcu_data *rdp)
{
int i;
if (init_nocb_callback_list(rdp))
return;
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
init_nocb_callback_list(rdp);
}
/*
@@ -1070,6 +1076,120 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
return rnp->completed + 2;
}
/*
* Trace-event helper function for rcu_start_future_gp() and
* rcu_nocb_wait_gp().
*/
static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
unsigned long c, char *s)
{
trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
rnp->completed, c, rnp->level,
rnp->grplo, rnp->grphi, s);
}
/*
* Start some future grace period, as needed to handle newly arrived
* callbacks. The required future grace periods are recorded in each
* rcu_node structure's ->need_future_gp field.
*
* The caller must hold the specified rcu_node structure's ->lock.
*/
static unsigned long __maybe_unused
rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
{
unsigned long c;
int i;
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
/*
* Pick up grace-period number for new callbacks. If this
* grace period is already marked as needed, return to the caller.
*/
c = rcu_cbs_completed(rdp->rsp, rnp);
trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
if (rnp->need_future_gp[c & 0x1]) {
trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
return c;
}
/*
* If either this rcu_node structure or the root rcu_node structure
* believe that a grace period is in progress, then we must wait
* for the one following, which is in "c". Because our request
* will be noticed at the end of the current grace period, we don't
* need to explicitly start one.
*/
if (rnp->gpnum != rnp->completed ||
ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
rnp->need_future_gp[c & 0x1]++;
trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
return c;
}
/*
* There might be no grace period in progress. If we don't already
* hold it, acquire the root rcu_node structure's lock in order to
* start one (if needed).
*/
if (rnp != rnp_root)
raw_spin_lock(&rnp_root->lock);
/*
* Get a new grace-period number. If there really is no grace
* period in progress, it will be smaller than the one we obtained
* earlier. Adjust callbacks as needed. Note that even no-CBs
* CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
*/
c = rcu_cbs_completed(rdp->rsp, rnp_root);
for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
rdp->nxtcompleted[i] = c;
/*
* If the needed for the required grace period is already
* recorded, trace and leave.
*/
if (rnp_root->need_future_gp[c & 0x1]) {
trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
goto unlock_out;
}
/* Record the need for the future grace period. */
rnp_root->need_future_gp[c & 0x1]++;
/* If a grace period is not already in progress, start one. */
if (rnp_root->gpnum != rnp_root->completed) {
trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
} else {
trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
}
unlock_out:
if (rnp != rnp_root)
raw_spin_unlock(&rnp_root->lock);
return c;
}
/*
* Clean up any old requests for the just-ended grace period. Also return
* whether any additional grace periods have been requested. Also invoke
* rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
* waiting for this grace period to complete.
*/
static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
int c = rnp->completed;
int needmore;
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
rcu_nocb_gp_cleanup(rsp, rnp);
rnp->need_future_gp[c & 0x1] = 0;
needmore = rnp->need_future_gp[(c + 1) & 0x1];
trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup");
return needmore;
}
/*
* If there is room, assign a ->completed number to any callbacks on
* this CPU that have not already been assigned. Also accelerate any
@@ -1129,6 +1249,8 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
rdp->nxtcompleted[i] = c;
}
/* Record any needed additional grace periods. */
rcu_start_future_gp(rnp, rdp);
/* Trace depending on how much we were able to accelerate. */
if (!*rdp->nxttail[RCU_WAIT_TAIL])
@@ -1308,9 +1430,9 @@ static int rcu_gp_init(struct rcu_state *rsp)
rdp = this_cpu_ptr(rsp->rda);
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
rnp->gpnum = rsp->gpnum;
ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
WARN_ON_ONCE(rnp->completed != rsp->completed);
rnp->completed = rsp->completed;
ACCESS_ONCE(rnp->completed) = rsp->completed;
if (rnp == rdp->mynode)
rcu_start_gp_per_cpu(rsp, rnp, rdp);
rcu_preempt_boost_start_gp(rnp);
@@ -1319,7 +1441,8 @@ static int rcu_gp_init(struct rcu_state *rsp)
rnp->grphi, rnp->qsmask);
raw_spin_unlock_irq(&rnp->lock);
#ifdef CONFIG_PROVE_RCU_DELAY
if ((prandom_u32() % (rcu_num_nodes * 8)) == 0)
if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 &&
system_state == SYSTEM_RUNNING)
schedule_timeout_uninterruptible(2);
#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
cond_resched();
@@ -1361,6 +1484,7 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
static void rcu_gp_cleanup(struct rcu_state *rsp)
{
unsigned long gp_duration;
int nocb = 0;
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
@@ -1390,17 +1514,23 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
raw_spin_lock_irq(&rnp->lock);
rnp->completed = rsp->gpnum;
ACCESS_ONCE(rnp->completed) = rsp->gpnum;
rdp = this_cpu_ptr(rsp->rda);
if (rnp == rdp->mynode)
__rcu_process_gp_end(rsp, rnp, rdp);
nocb += rcu_future_gp_cleanup(rsp, rnp);
raw_spin_unlock_irq(&rnp->lock);
cond_resched();
}
rnp = rcu_get_root(rsp);
raw_spin_lock_irq(&rnp->lock);
rcu_nocb_gp_set(rnp, nocb);
rsp->completed = rsp->gpnum; /* Declare grace period done. */
trace_rcu_grace_period(rsp->name, rsp->completed, "end");
rsp->fqs_state = RCU_GP_IDLE;
rdp = this_cpu_ptr(rsp->rda);
rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
if (cpu_needs_another_gp(rsp, rdp))
rsp->gp_flags = 1;
raw_spin_unlock_irq(&rnp->lock);
@@ -1476,57 +1606,62 @@ static int __noreturn rcu_gp_kthread(void *arg)
/*
* Start a new RCU grace period if warranted, re-initializing the hierarchy
* in preparation for detecting the next grace period. The caller must hold
* the root node's ->lock, which is released before return. Hard irqs must
* be disabled.
* the root node's ->lock and hard irqs must be disabled.
*
* Note that it is legal for a dying CPU (which is marked as offline) to
* invoke this function. This can happen when the dying CPU reports its
* quiescent state.
*/
static void
rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp)
{
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_node *rnp = rcu_get_root(rsp);
if (!rsp->gp_kthread ||
!cpu_needs_another_gp(rsp, rdp)) {
if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
/*
* Either we have not yet spawned the grace-period
* task, this CPU does not need another grace period,
* or a grace period is already in progress.
* Either way, don't start a new grace period.
*/
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
/*
* Because there is no grace period in progress right now,
* any callbacks we have up to this point will be satisfied
* by the next grace period. So this is a good place to
* assign a grace period number to recently posted callbacks.
*/
rcu_accelerate_cbs(rsp, rnp, rdp);
rsp->gp_flags = RCU_GP_FLAG_INIT;
raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
/* Ensure that CPU is aware of completion of last grace period. */
rcu_process_gp_end(rsp, rdp);
local_irq_restore(flags);
/* Wake up rcu_gp_kthread() to start the grace period. */
wake_up(&rsp->gp_wq);
}
/*
* Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
* callbacks. Note that rcu_start_gp_advanced() cannot do this because it
* is invoked indirectly from rcu_advance_cbs(), which would result in
* endless recursion -- or would do so if it wasn't for the self-deadlock
* that is encountered beforehand.
*/
static void
rcu_start_gp(struct rcu_state *rsp)
{
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_node *rnp = rcu_get_root(rsp);
/*
* If there is no grace period in progress right now, any
* callbacks we have up to this point will be satisfied by the
* next grace period. Also, advancing the callbacks reduces the
* probability of false positives from cpu_needs_another_gp()
* resulting in pointless grace periods. So, advance callbacks
* then start the grace period!
*/
rcu_advance_cbs(rsp, rnp, rdp);
rcu_start_gp_advanced(rsp, rnp, rdp);
}
/*
* Report a full set of quiescent states to the specified rcu_state
* data structure. This involves cleaning up after the prior grace
* period and letting rcu_start_gp() start up the next grace period
* if one is needed. Note that the caller must hold rnp->lock, as
* required by rcu_start_gp(), which will release it.
* if one is needed. Note that the caller must hold rnp->lock, which
* is released before return.
*/
static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
@@ -2124,7 +2259,8 @@ __rcu_process_callbacks(struct rcu_state *rsp)
local_irq_save(flags);
if (cpu_needs_another_gp(rsp, rdp)) {
raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
rcu_start_gp(rsp, flags); /* releases above lock */
rcu_start_gp(rsp);
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
} else {
local_irq_restore(flags);
}
@@ -2169,7 +2305,8 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
static void invoke_rcu_core(void)
{
raise_softirq(RCU_SOFTIRQ);
if (cpu_online(smp_processor_id()))
raise_softirq(RCU_SOFTIRQ);
}
/*
@@ -2204,11 +2341,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
/* Start a new grace period if one not already started. */
if (!rcu_gp_in_progress(rsp)) {
unsigned long nestflag;
struct rcu_node *rnp_root = rcu_get_root(rsp);
raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
raw_spin_lock(&rnp_root->lock);
rcu_start_gp(rsp);
raw_spin_unlock(&rnp_root->lock);
} else {
/* Give the grace period a kick. */
rdp->blimit = LONG_MAX;
@@ -2628,19 +2765,27 @@ static int rcu_pending(int cpu)
}
/*
* Check to see if any future RCU-related work will need to be done
* by the current CPU, even if none need be done immediately, returning
* 1 if so.
* Return true if the specified CPU has any callback. If all_lazy is
* non-NULL, store an indication of whether all callbacks are lazy.
* (If there are no callbacks, all of them are deemed to be lazy.)
*/
static int rcu_cpu_has_callbacks(int cpu)
static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
{
bool al = true;
bool hc = false;
struct rcu_data *rdp;
struct rcu_state *rsp;
/* RCU callbacks either ready or pending? */
for_each_rcu_flavor(rsp)
if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
return 1;
return 0;
for_each_rcu_flavor(rsp) {
rdp = per_cpu_ptr(rsp->rda, cpu);
if (rdp->qlen != rdp->qlen_lazy)
al = false;
if (rdp->nxtlist)
hc = true;
}
if (all_lazy)
*all_lazy = al;
return hc;
}
/*
@@ -2859,7 +3004,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
atomic_set(&rdp->dynticks->dynticks,
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
rcu_prepare_for_idle_init(cpu);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
/* Add CPU to rcu_node bitmasks. */
@@ -2909,7 +3053,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode;
struct rcu_state *rsp;
int ret = NOTIFY_OK;
trace_rcu_utilization("Start CPU hotplug");
switch (action) {
@@ -2923,21 +3066,12 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
rcu_boost_kthread_setaffinity(rnp, -1);
break;
case CPU_DOWN_PREPARE:
if (nocb_cpu_expendable(cpu))
rcu_boost_kthread_setaffinity(rnp, cpu);
else
ret = NOTIFY_BAD;
rcu_boost_kthread_setaffinity(rnp, cpu);
break;
case CPU_DYING:
case CPU_DYING_FROZEN:
/*
* The whole machine is "stopped" except this CPU, so we can
* touch any data without introducing corruption. We send the
* dying CPU's callbacks to an arbitrarily chosen online CPU.
*/
for_each_rcu_flavor(rsp)
rcu_cleanup_dying_cpu(rsp);
rcu_cleanup_after_idle(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
@@ -2950,7 +3084,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
break;
}
trace_rcu_utilization("End CPU hotplug");
return ret;
return NOTIFY_OK;
}
/*
@@ -3085,6 +3219,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
}
rnp->level = i;
INIT_LIST_HEAD(&rnp->blkd_tasks);
rcu_init_one_nocb(rnp);
}
}
@@ -3170,8 +3305,7 @@ void __init rcu_init(void)
rcu_init_one(&rcu_sched_state, &rcu_sched_data);
rcu_init_one(&rcu_bh_state, &rcu_bh_data);
__rcu_init_preempt();
rcu_init_nocb();
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
/*
* We don't need protection against CPU-hotplug here because

View File

@@ -88,18 +88,13 @@ struct rcu_dynticks {
int dynticks_nmi_nesting; /* Track NMI nesting level. */
atomic_t dynticks; /* Even value for idle, else odd. */
#ifdef CONFIG_RCU_FAST_NO_HZ
int dyntick_drain; /* Prepare-for-idle state variable. */
unsigned long dyntick_holdoff;
/* No retries for the jiffy of failure. */
struct timer_list idle_gp_timer;
/* Wake up CPU sleeping with callbacks. */
unsigned long idle_gp_timer_expires;
/* When to wake up CPU (for repost). */
bool idle_first_pass; /* First pass of attempt to go idle? */
bool all_lazy; /* Are all CPU's CBs lazy? */
unsigned long nonlazy_posted;
/* # times non-lazy CBs posted to CPU. */
unsigned long nonlazy_posted_snap;
/* idle-period nonlazy_posted snapshot. */
unsigned long last_accelerate;
/* Last jiffy CBs were accelerated. */
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
};
@@ -134,9 +129,6 @@ struct rcu_node {
/* elements that need to drain to allow the */
/* current expedited grace period to */
/* complete (only for TREE_PREEMPT_RCU). */
atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */
/* Since this has meaning only for leaf */
/* rcu_node structures, 32 bits suffices. */
unsigned long qsmaskinit;
/* Per-GP initial value for qsmask & expmask. */
unsigned long grpmask; /* Mask to apply to parent qsmask. */
@@ -196,6 +188,12 @@ struct rcu_node {
/* Refused to boost: not sure why, though. */
/* This can happen due to race conditions. */
#endif /* #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_NOCB_CPU
wait_queue_head_t nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
int need_future_gp[2];
/* Counts of upcoming no-CB GP requests. */
raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
} ____cacheline_internodealigned_in_smp;
@@ -328,6 +326,11 @@ struct rcu_data {
struct task_struct *nocb_kthread;
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
/* 8) RCU CPU stall data. */
#ifdef CONFIG_RCU_CPU_STALL_INFO
unsigned int softirq_snap; /* Snapshot of softirq activity. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
int cpu;
struct rcu_state *rsp;
};
@@ -375,12 +378,6 @@ struct rcu_state {
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
void (*func)(struct rcu_head *head));
#ifdef CONFIG_RCU_NOCB_CPU
void (*call_remote)(struct rcu_head *head,
void (*func)(struct rcu_head *head));
/* call_rcu() flavor, but for */
/* placing on remote CPU. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
/* The following fields are guarded by the root rcu_node's lock. */
@@ -443,6 +440,7 @@ struct rcu_state {
unsigned long gp_max; /* Maximum GP duration in */
/* jiffies. */
char *name; /* Name of structure. */
char abbr; /* Abbreviated name. */
struct list_head flavors; /* List of RCU flavors. */
};
@@ -520,7 +518,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_BOOST */
static void __cpuinit rcu_prepare_kthreads(int cpu);
static void rcu_prepare_for_idle_init(int cpu);
static void rcu_cleanup_after_idle(int cpu);
static void rcu_prepare_for_idle(int cpu);
static void rcu_idle_count_callbacks_posted(void);
@@ -529,16 +526,18 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void);
static int rcu_nocb_needs_gp(struct rcu_state *rsp);
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
static void rcu_init_one_nocb(struct rcu_node *rnp);
static bool is_nocb_cpu(int cpu);
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
bool lazy);
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
struct rcu_data *rdp);
static bool nocb_cpu_expendable(int cpu);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
static void init_nocb_callback_list(struct rcu_data *rdp);
static void __init rcu_init_nocb(void);
static bool init_nocb_callback_list(struct rcu_data *rdp);
#endif /* #ifndef RCU_TREE_NONCORE */

View File

@@ -85,11 +85,21 @@ static void __init rcu_bootup_announce_oddness(void)
if (nr_cpu_ids != NR_CPUS)
printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
#ifdef CONFIG_RCU_NOCB_CPU
#ifndef CONFIG_RCU_NOCB_CPU_NONE
if (!have_rcu_nocb_mask) {
alloc_bootmem_cpumask_var(&rcu_nocb_mask);
have_rcu_nocb_mask = true;
}
#ifdef CONFIG_RCU_NOCB_CPU_ZERO
pr_info("\tExperimental no-CBs CPU 0\n");
cpumask_set_cpu(0, rcu_nocb_mask);
#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
#ifdef CONFIG_RCU_NOCB_CPU_ALL
pr_info("\tExperimental no-CBs for all CPUs\n");
cpumask_setall(rcu_nocb_mask);
#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
if (have_rcu_nocb_mask) {
if (cpumask_test_cpu(0, rcu_nocb_mask)) {
cpumask_clear_cpu(0, rcu_nocb_mask);
pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n");
}
cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
if (rcu_nocb_poll)
@@ -101,7 +111,7 @@ static void __init rcu_bootup_announce_oddness(void)
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_state rcu_preempt_state =
RCU_STATE_INITIALIZER(rcu_preempt, call_rcu);
RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
static struct rcu_state *rcu_state = &rcu_preempt_state;
@@ -1533,14 +1543,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
{
*delta_jiffies = ULONG_MAX;
return rcu_cpu_has_callbacks(cpu);
}
/*
* Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
*/
static void rcu_prepare_for_idle_init(int cpu)
{
return rcu_cpu_has_callbacks(cpu, NULL);
}
/*
@@ -1577,16 +1580,6 @@ static void rcu_idle_count_callbacks_posted(void)
*
* The following three proprocessor symbols control this state machine:
*
* RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
* to satisfy RCU. Beyond this point, it is better to incur a periodic
* scheduling-clock interrupt than to loop through the state machine
* at full power.
* RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
* optional if RCU does not need anything immediately from this
* CPU, even if this CPU still has RCU callbacks queued. The first
* times through the state machine are mandatory: we need to give
* the state machine a chance to communicate a quiescent state
* to the RCU core.
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
* to sleep in dyntick-idle mode with RCU callbacks pending. This
* is sized to be roughly one RCU grace period. Those energy-efficiency
@@ -1602,186 +1595,108 @@ static void rcu_idle_count_callbacks_posted(void)
* adjustment, they can be converted into kernel config parameters, though
* making the state machine smarter might be a better option.
*/
#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
module_param(rcu_idle_gp_delay, int, 0644);
static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
module_param(rcu_idle_lazy_gp_delay, int, 0644);
extern int tick_nohz_enabled;
/*
* Does the specified flavor of RCU have non-lazy callbacks pending on
* the specified CPU? Both RCU flavor and CPU are specified by the
* rcu_data structure.
* Try to advance callbacks for all flavors of RCU on the current CPU.
* Afterwards, if there are any callbacks ready for immediate invocation,
* return true.
*/
static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp)
static bool rcu_try_advance_all_cbs(void)
{
return rdp->qlen != rdp->qlen_lazy;
}
bool cbs_ready = false;
struct rcu_data *rdp;
struct rcu_node *rnp;
struct rcu_state *rsp;
#ifdef CONFIG_TREE_PREEMPT_RCU
for_each_rcu_flavor(rsp) {
rdp = this_cpu_ptr(rsp->rda);
rnp = rdp->mynode;
/*
* Are there non-lazy RCU-preempt callbacks? (There cannot be if there
* is no RCU-preempt in the kernel.)
*/
static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
{
struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
/*
* Don't bother checking unless a grace period has
* completed since we last checked and there are
* callbacks not yet ready to invoke.
*/
if (rdp->completed != rnp->completed &&
rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
rcu_process_gp_end(rsp, rdp);
return __rcu_cpu_has_nonlazy_callbacks(rdp);
}
#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
{
return 0;
}
#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */
/*
* Does any flavor of RCU have non-lazy callbacks on the specified CPU?
*/
static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
{
return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) ||
__rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) ||
rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
if (cpu_has_callbacks_ready_to_invoke(rdp))
cbs_ready = true;
}
return cbs_ready;
}
/*
* Allow the CPU to enter dyntick-idle mode if either: (1) There are no
* callbacks on this CPU, (2) this CPU has not yet attempted to enter
* dyntick-idle mode, or (3) this CPU is in the process of attempting to
* enter dyntick-idle mode. Otherwise, if we have recently tried and failed
* to enter dyntick-idle mode, we refuse to try to enter it. After all,
* it is better to incur scheduling-clock interrupts than to spin
* continuously for the same time duration!
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
* to invoke. If the CPU has callbacks, try to advance them. Tell the
* caller to set the timeout based on whether or not there are non-lazy
* callbacks.
*
* The delta_jiffies argument is used to store the time when RCU is
* going to need the CPU again if it still has callbacks. The reason
* for this is that rcu_prepare_for_idle() might need to post a timer,
* but if so, it will do so after tick_nohz_stop_sched_tick() has set
* the wakeup time for this CPU. This means that RCU's timer can be
* delayed until the wakeup time, which defeats the purpose of posting
* a timer.
* The caller must have disabled interrupts.
*/
int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
int rcu_needs_cpu(int cpu, unsigned long *dj)
{
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
/* Flag a new idle sojourn to the idle-entry state machine. */
rdtp->idle_first_pass = 1;
/* Snapshot to detect later posting of non-lazy callback. */
rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
/* If no callbacks, RCU doesn't need the CPU. */
if (!rcu_cpu_has_callbacks(cpu)) {
*delta_jiffies = ULONG_MAX;
if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {
*dj = ULONG_MAX;
return 0;
}
if (rdtp->dyntick_holdoff == jiffies) {
/* RCU recently tried and failed, so don't try again. */
*delta_jiffies = 1;
/* Attempt to advance callbacks. */
if (rcu_try_advance_all_cbs()) {
/* Some ready to invoke, so initiate later invocation. */
invoke_rcu_core();
return 1;
}
/* Set up for the possibility that RCU will post a timer. */
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
*delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
RCU_IDLE_GP_DELAY) - jiffies;
rdtp->last_accelerate = jiffies;
/* Request timer delay depending on laziness, and round. */
if (rdtp->all_lazy) {
*dj = round_up(rcu_idle_gp_delay + jiffies,
rcu_idle_gp_delay) - jiffies;
} else {
*delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
*delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
*dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
}
return 0;
}
/*
* Handler for smp_call_function_single(). The only point of this
* handler is to wake the CPU up, so the handler does only tracing.
*/
void rcu_idle_demigrate(void *unused)
{
trace_rcu_prep_idle("Demigrate");
}
/*
* Timer handler used to force CPU to start pushing its remaining RCU
* callbacks in the case where it entered dyntick-idle mode with callbacks
* pending. The hander doesn't really need to do anything because the
* real work is done upon re-entry to idle, or by the next scheduling-clock
* interrupt should idle not be re-entered.
*
* One special case: the timer gets migrated without awakening the CPU
* on which the timer was scheduled on. In this case, we must wake up
* that CPU. We do so with smp_call_function_single().
*/
static void rcu_idle_gp_timer_func(unsigned long cpu_in)
{
int cpu = (int)cpu_in;
trace_rcu_prep_idle("Timer");
if (cpu != smp_processor_id())
smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0);
else
WARN_ON_ONCE(1); /* Getting here can hang the system... */
}
/*
* Initialize the timer used to pull CPUs out of dyntick-idle mode.
*/
static void rcu_prepare_for_idle_init(int cpu)
{
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
rdtp->dyntick_holdoff = jiffies - 1;
setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu);
rdtp->idle_gp_timer_expires = jiffies - 1;
rdtp->idle_first_pass = 1;
}
/*
* Clean up for exit from idle. Because we are exiting from idle, there
* is no longer any point to ->idle_gp_timer, so cancel it. This will
* do nothing if this timer is not active, so just cancel it unconditionally.
*/
static void rcu_cleanup_after_idle(int cpu)
{
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
del_timer(&rdtp->idle_gp_timer);
trace_rcu_prep_idle("Cleanup after idle");
rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
}
/*
* Check to see if any RCU-related work can be done by the current CPU,
* and if so, schedule a softirq to get it done. This function is part
* of the RCU implementation; it is -not- an exported member of the RCU API.
*
* The idea is for the current CPU to clear out all work required by the
* RCU core for the current grace period, so that this CPU can be permitted
* to enter dyntick-idle mode. In some cases, it will need to be awakened
* at the end of the grace period by whatever CPU ends the grace period.
* This allows CPUs to go dyntick-idle more quickly, and to reduce the
* number of wakeups by a modest integer factor.
*
* Because it is not legal to invoke rcu_process_callbacks() with irqs
* disabled, we do one pass of force_quiescent_state(), then do a
* invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
* later. The ->dyntick_drain field controls the sequencing.
* Prepare a CPU for idle from an RCU perspective. The first major task
* is to sense whether nohz mode has been enabled or disabled via sysfs.
* The second major task is to check to see if a non-lazy callback has
* arrived at a CPU that previously had only lazy callbacks. The third
* major task is to accelerate (that is, assign grace-period numbers to)
* any recently arrived callbacks.
*
* The caller must have disabled interrupts.
*/
static void rcu_prepare_for_idle(int cpu)
{
struct timer_list *tp;
struct rcu_data *rdp;
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
struct rcu_node *rnp;
struct rcu_state *rsp;
int tne;
/* Handle nohz enablement switches conservatively. */
tne = ACCESS_ONCE(tick_nohz_enabled);
if (tne != rdtp->tick_nohz_enabled_snap) {
if (rcu_cpu_has_callbacks(cpu))
if (rcu_cpu_has_callbacks(cpu, NULL))
invoke_rcu_core(); /* force nohz to see update. */
rdtp->tick_nohz_enabled_snap = tne;
return;
@@ -1789,125 +1704,56 @@ static void rcu_prepare_for_idle(int cpu)
if (!tne)
return;
/* Adaptive-tick mode, where usermode execution is idle to RCU. */
if (!is_idle_task(current)) {
rdtp->dyntick_holdoff = jiffies - 1;
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
trace_rcu_prep_idle("User dyntick with callbacks");
rdtp->idle_gp_timer_expires =
round_up(jiffies + RCU_IDLE_GP_DELAY,
RCU_IDLE_GP_DELAY);
} else if (rcu_cpu_has_callbacks(cpu)) {
rdtp->idle_gp_timer_expires =
round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
trace_rcu_prep_idle("User dyntick with lazy callbacks");
} else {
return;
}
tp = &rdtp->idle_gp_timer;
mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
/* If this is a no-CBs CPU, no callbacks, just return. */
if (is_nocb_cpu(cpu))
return;
}
/*
* If this is an idle re-entry, for example, due to use of
* RCU_NONIDLE() or the new idle-loop tracing API within the idle
* loop, then don't take any state-machine actions, unless the
* momentary exit from idle queued additional non-lazy callbacks.
* Instead, repost the ->idle_gp_timer if this CPU has callbacks
* pending.
* If a non-lazy callback arrived at a CPU having only lazy
* callbacks, invoke RCU core for the side-effect of recalculating
* idle duration on re-entry to idle.
*/
if (!rdtp->idle_first_pass &&
(rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) {
if (rcu_cpu_has_callbacks(cpu)) {
tp = &rdtp->idle_gp_timer;
mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
}
return;
}
rdtp->idle_first_pass = 0;
rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1;
/*
* If there are no callbacks on this CPU, enter dyntick-idle mode.
* Also reset state to avoid prejudicing later attempts.
*/
if (!rcu_cpu_has_callbacks(cpu)) {
rdtp->dyntick_holdoff = jiffies - 1;
rdtp->dyntick_drain = 0;
trace_rcu_prep_idle("No callbacks");
return;
}
/*
* If in holdoff mode, just return. We will presumably have
* refrained from disabling the scheduling-clock tick.
*/
if (rdtp->dyntick_holdoff == jiffies) {
trace_rcu_prep_idle("In holdoff");
return;
}
/* Check and update the ->dyntick_drain sequencing. */
if (rdtp->dyntick_drain <= 0) {
/* First time through, initialize the counter. */
rdtp->dyntick_drain = RCU_IDLE_FLUSHES;
} else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES &&
!rcu_pending(cpu) &&
!local_softirq_pending()) {
/* Can we go dyntick-idle despite still having callbacks? */
rdtp->dyntick_drain = 0;
rdtp->dyntick_holdoff = jiffies;
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
trace_rcu_prep_idle("Dyntick with callbacks");
rdtp->idle_gp_timer_expires =
round_up(jiffies + RCU_IDLE_GP_DELAY,
RCU_IDLE_GP_DELAY);
} else {
rdtp->idle_gp_timer_expires =
round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
trace_rcu_prep_idle("Dyntick with lazy callbacks");
}
tp = &rdtp->idle_gp_timer;
mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
return; /* Nothing more to do immediately. */
} else if (--(rdtp->dyntick_drain) <= 0) {
/* We have hit the limit, so time to give up. */
rdtp->dyntick_holdoff = jiffies;
trace_rcu_prep_idle("Begin holdoff");
invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
return;
}
/*
* Do one step of pushing the remaining RCU callbacks through
* the RCU core state machine.
*/
#ifdef CONFIG_TREE_PREEMPT_RCU
if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
rcu_preempt_qs(cpu);
force_quiescent_state(&rcu_preempt_state);
}
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
if (per_cpu(rcu_sched_data, cpu).nxtlist) {
rcu_sched_qs(cpu);
force_quiescent_state(&rcu_sched_state);
}
if (per_cpu(rcu_bh_data, cpu).nxtlist) {
rcu_bh_qs(cpu);
force_quiescent_state(&rcu_bh_state);
}
/*
* If RCU callbacks are still pending, RCU still needs this CPU.
* So try forcing the callbacks through the grace period.
*/
if (rcu_cpu_has_callbacks(cpu)) {
trace_rcu_prep_idle("More callbacks");
if (rdtp->all_lazy &&
rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
invoke_rcu_core();
} else {
trace_rcu_prep_idle("Callbacks drained");
return;
}
/*
* If we have not yet accelerated this jiffy, accelerate all
* callbacks on this CPU.
*/
if (rdtp->last_accelerate == jiffies)
return;
rdtp->last_accelerate = jiffies;
for_each_rcu_flavor(rsp) {
rdp = per_cpu_ptr(rsp->rda, cpu);
if (!*rdp->nxttail[RCU_DONE_TAIL])
continue;
rnp = rdp->mynode;
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
rcu_accelerate_cbs(rsp, rnp, rdp);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
}
/*
* Clean up for exit from idle. Attempt to advance callbacks based on
* any grace periods that elapsed while the CPU was idle, and if any
* callbacks are now ready to invoke, initiate invocation.
*/
static void rcu_cleanup_after_idle(int cpu)
{
struct rcu_data *rdp;
struct rcu_state *rsp;
if (is_nocb_cpu(cpu))
return;
rcu_try_advance_all_cbs();
for_each_rcu_flavor(rsp) {
rdp = per_cpu_ptr(rsp->rda, cpu);
if (cpu_has_callbacks_ready_to_invoke(rdp))
invoke_rcu_core();
}
}
@@ -2015,16 +1861,13 @@ early_initcall(rcu_register_oom_notifier);
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
{
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
struct timer_list *tltp = &rdtp->idle_gp_timer;
char c;
unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.';
if (timer_pending(tltp))
sprintf(cp, "drain=%d %c timer=%lu",
rdtp->dyntick_drain, c, tltp->expires - jiffies);
else
sprintf(cp, "drain=%d %c timer not pending",
rdtp->dyntick_drain, c);
sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
ulong2long(nlpd),
rdtp->all_lazy ? 'L' : '.',
rdtp->tick_nohz_enabled_snap ? '.' : 'D');
}
#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
@@ -2070,10 +1913,11 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
ticks_value = rsp->gpnum - rdp->gpnum;
}
print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n",
printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",
cpu, ticks_value, ticks_title,
atomic_read(&rdtp->dynticks) & 0xfff,
rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
fast_no_hz);
}
@@ -2087,6 +1931,7 @@ static void print_cpu_stall_info_end(void)
static void zero_cpu_stall_ticks(struct rcu_data *rdp)
{
rdp->ticks_this_gp = 0;
rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
}
/* Increment ->ticks_this_gp for all flavors of RCU. */
@@ -2165,6 +2010,47 @@ static int __init parse_rcu_nocb_poll(char *arg)
}
early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
/*
* Do any no-CBs CPUs need another grace period?
*
* Interrupts must be disabled. If the caller does not hold the root
* rnp_node structure's ->lock, the results are advisory only.
*/
static int rcu_nocb_needs_gp(struct rcu_state *rsp)
{
struct rcu_node *rnp = rcu_get_root(rsp);
return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
}
/*
* Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
* grace period.
*/
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
}
/*
* Set the root rcu_node structure's ->need_future_gp field
* based on the sum of those of all rcu_node structures. This does
* double-count the root rcu_node structure's requests, but this
* is necessary to handle the possibility of a rcu_nocb_kthread()
* having awakened during the time that the rcu_node structures
* were being updated for the end of the previous grace period.
*/
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
{
rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
}
static void rcu_init_one_nocb(struct rcu_node *rnp)
{
init_waitqueue_head(&rnp->nocb_gp_wq[0]);
init_waitqueue_head(&rnp->nocb_gp_wq[1]);
}
/* Is the specified CPU a no-CPUs CPU? */
static bool is_nocb_cpu(int cpu)
{
@@ -2227,6 +2113,13 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
if (!is_nocb_cpu(rdp->cpu))
return 0;
__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
if (__is_kfree_rcu_offset((unsigned long)rhp->func))
trace_rcu_kfree_callback(rdp->rsp->name, rhp,
(unsigned long)rhp->func,
rdp->qlen_lazy, rdp->qlen);
else
trace_rcu_callback(rdp->rsp->name, rhp,
rdp->qlen_lazy, rdp->qlen);
return 1;
}
@@ -2265,95 +2158,36 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
}
/*
* There must be at least one non-no-CBs CPU in operation at any given
* time, because no-CBs CPUs are not capable of initiating grace periods
* independently. This function therefore complains if the specified
* CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to
* avoid offlining the last such CPU. (Recursion is a wonderful thing,
* but you have to have a base case!)
* If necessary, kick off a new grace period, and either way wait
* for a subsequent grace period to complete.
*/
static bool nocb_cpu_expendable(int cpu)
static void rcu_nocb_wait_gp(struct rcu_data *rdp)
{
cpumask_var_t non_nocb_cpus;
int ret;
unsigned long c;
bool d;
unsigned long flags;
struct rcu_node *rnp = rdp->mynode;
raw_spin_lock_irqsave(&rnp->lock, flags);
c = rcu_start_future_gp(rnp, rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
/*
* If there are no no-CB CPUs or if this CPU is not a no-CB CPU,
* then offlining this CPU is harmless. Let it happen.
* Wait for the grace period. Do so interruptibly to avoid messing
* up the load average.
*/
if (!have_rcu_nocb_mask || is_nocb_cpu(cpu))
return 1;
/* If no memory, play it safe and keep the CPU around. */
if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO))
return 0;
cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask);
cpumask_clear_cpu(cpu, non_nocb_cpus);
ret = !cpumask_empty(non_nocb_cpus);
free_cpumask_var(non_nocb_cpus);
return ret;
}
/*
* Helper structure for remote registry of RCU callbacks.
* This is needed for when a no-CBs CPU needs to start a grace period.
* If it just invokes call_rcu(), the resulting callback will be queued,
* which can result in deadlock.
*/
struct rcu_head_remote {
struct rcu_head *rhp;
call_rcu_func_t *crf;
void (*func)(struct rcu_head *rhp);
};
/*
* Register a callback as specified by the rcu_head_remote struct.
* This function is intended to be invoked via smp_call_function_single().
*/
static void call_rcu_local(void *arg)
{
struct rcu_head_remote *rhrp =
container_of(arg, struct rcu_head_remote, rhp);
rhrp->crf(rhrp->rhp, rhrp->func);
}
/*
* Set up an rcu_head_remote structure and the invoke call_rcu_local()
* on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via
* smp_call_function_single().
*/
static void invoke_crf_remote(struct rcu_head *rhp,
void (*func)(struct rcu_head *rhp),
call_rcu_func_t crf)
{
struct rcu_head_remote rhr;
rhr.rhp = rhp;
rhr.crf = crf;
rhr.func = func;
smp_call_function_single(0, call_rcu_local, &rhr, 1);
}
/*
* Helper functions to be passed to wait_rcu_gp(), each of which
* invokes invoke_crf_remote() to register a callback appropriately.
*/
static void __maybe_unused
call_rcu_preempt_remote(struct rcu_head *rhp,
void (*func)(struct rcu_head *rhp))
{
invoke_crf_remote(rhp, func, call_rcu);
}
static void call_rcu_bh_remote(struct rcu_head *rhp,
void (*func)(struct rcu_head *rhp))
{
invoke_crf_remote(rhp, func, call_rcu_bh);
}
static void call_rcu_sched_remote(struct rcu_head *rhp,
void (*func)(struct rcu_head *rhp))
{
invoke_crf_remote(rhp, func, call_rcu_sched);
trace_rcu_future_gp(rnp, rdp, c, "StartWait");
for (;;) {
wait_event_interruptible(
rnp->nocb_gp_wq[c & 0x1],
(d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
if (likely(d))
break;
flush_signals(current);
trace_rcu_future_gp(rnp, rdp, c, "ResumeWait");
}
trace_rcu_future_gp(rnp, rdp, c, "EndWait");
smp_mb(); /* Ensure that CB invocation happens after GP end. */
}
/*
@@ -2390,7 +2224,7 @@ static int rcu_nocb_kthread(void *arg)
cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
ACCESS_ONCE(rdp->nocb_p_count) += c;
ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
wait_rcu_gp(rdp->rsp->call_remote);
rcu_nocb_wait_gp(rdp);
/* Each pass through the following loop invokes a callback. */
trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
@@ -2436,33 +2270,42 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
return;
for_each_cpu(cpu, rcu_nocb_mask) {
rdp = per_cpu_ptr(rsp->rda, cpu);
t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu);
t = kthread_run(rcu_nocb_kthread, rdp,
"rcuo%c/%d", rsp->abbr, cpu);
BUG_ON(IS_ERR(t));
ACCESS_ONCE(rdp->nocb_kthread) = t;
}
}
/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
static void init_nocb_callback_list(struct rcu_data *rdp)
static bool init_nocb_callback_list(struct rcu_data *rdp)
{
if (rcu_nocb_mask == NULL ||
!cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
return;
return false;
rdp->nxttail[RCU_NEXT_TAIL] = NULL;
}
/* Initialize the ->call_remote fields in the rcu_state structures. */
static void __init rcu_init_nocb(void)
{
#ifdef CONFIG_PREEMPT_RCU
rcu_preempt_state.call_remote = call_rcu_preempt_remote;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
rcu_bh_state.call_remote = call_rcu_bh_remote;
rcu_sched_state.call_remote = call_rcu_sched_remote;
return true;
}
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
static int rcu_nocb_needs_gp(struct rcu_state *rsp)
{
return 0;
}
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
}
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
{
}
static void rcu_init_one_nocb(struct rcu_node *rnp)
{
}
static bool is_nocb_cpu(int cpu)
{
return false;
@@ -2480,11 +2323,6 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
return 0;
}
static bool nocb_cpu_expendable(int cpu)
{
return 1;
}
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
{
}
@@ -2493,12 +2331,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
{
}
static void init_nocb_callback_list(struct rcu_data *rdp)
{
}
static void __init rcu_init_nocb(void)
static bool init_nocb_callback_list(struct rcu_data *rdp)
{
return false;
}
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */

View File

@@ -46,8 +46,6 @@
#define RCU_TREE_NONCORE
#include "rcutree.h"
#define ulong2long(a) (*(long *)(&(a)))
static int r_open(struct inode *inode, struct file *file,
const struct seq_operations *op)
{