Merge branches 'fixes.2015.07.22a' and 'initexp.2015.08.04a' into HEAD

fixes.2015.07.22a: Miscellaneous fixes.
initexp.2015.08.04a: Initialization and expedited updates.
	(Single branch due to conflicts.)
This commit is contained in:
Paul E. McKenney
2015-08-04 08:40:58 -07:00
20 changed files with 435 additions and 476 deletions

View File

@@ -70,6 +70,8 @@ MODULE_ALIAS("rcutree");
static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_exp_sched_class[RCU_NUM_LVLS];
/*
* In order to export the rcu_state name to the tracing tools, it
@@ -124,13 +126,8 @@ module_param(rcu_fanout_exact, bool, 0444);
static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
module_param(rcu_fanout_leaf, int, 0444);
int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
NUM_RCU_LVL_0,
NUM_RCU_LVL_1,
NUM_RCU_LVL_2,
NUM_RCU_LVL_3,
NUM_RCU_LVL_4,
};
/* Number of rcu_nodes at specified level. */
static int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
/*
@@ -1178,9 +1175,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
j = jiffies;
gpa = READ_ONCE(rsp->gp_activity);
if (j - gpa > 2 * HZ)
pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x\n",
pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
rsp->name, j - gpa,
rsp->gpnum, rsp->completed, rsp->gp_flags);
rsp->gpnum, rsp->completed,
rsp->gp_flags, rsp->gp_state,
rsp->gp_kthread ? rsp->gp_kthread->state : 0);
}
/*
@@ -1905,6 +1904,26 @@ static int rcu_gp_init(struct rcu_state *rsp)
return 1;
}
/*
* Helper function for wait_event_interruptible_timeout() wakeup
* at force-quiescent-state time.
*/
static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
{
struct rcu_node *rnp = rcu_get_root(rsp);
/* Someone like call_rcu() requested a force-quiescent-state scan. */
*gfp = READ_ONCE(rsp->gp_flags);
if (*gfp & RCU_GP_FLAG_FQS)
return true;
/* The current grace period has completed. */
if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
return true;
return false;
}
/*
* Do one round of quiescent-state forcing.
*/
@@ -2041,6 +2060,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
wait_event_interruptible(rsp->gp_wq,
READ_ONCE(rsp->gp_flags) &
RCU_GP_FLAG_INIT);
rsp->gp_state = RCU_GP_DONE_GPS;
/* Locking provides needed memory barrier. */
if (rcu_gp_init(rsp))
break;
@@ -2068,11 +2088,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
TPS("fqswait"));
rsp->gp_state = RCU_GP_WAIT_FQS;
ret = wait_event_interruptible_timeout(rsp->gp_wq,
((gf = READ_ONCE(rsp->gp_flags)) &
RCU_GP_FLAG_FQS) ||
(!READ_ONCE(rnp->qsmask) &&
!rcu_preempt_blocked_readers_cgp(rnp)),
j);
rcu_gp_fqs_check_wake(rsp, &gf), j);
rsp->gp_state = RCU_GP_DOING_FQS;
/* Locking provides needed memory barriers. */
/* If grace period done, leave loop. */
if (!READ_ONCE(rnp->qsmask) &&
@@ -2110,7 +2127,9 @@ static int __noreturn rcu_gp_kthread(void *arg)
}
/* Handle grace-period end. */
rsp->gp_state = RCU_GP_CLEANUP;
rcu_gp_cleanup(rsp);
rsp->gp_state = RCU_GP_CLEANED;
}
}
@@ -3305,23 +3324,195 @@ void cond_synchronize_sched(unsigned long oldstate)
}
EXPORT_SYMBOL_GPL(cond_synchronize_sched);
/* Adjust sequence number for start of update-side operation. */
static void rcu_seq_start(unsigned long *sp)
{
WRITE_ONCE(*sp, *sp + 1);
smp_mb(); /* Ensure update-side operation after counter increment. */
WARN_ON_ONCE(!(*sp & 0x1));
}
/* Adjust sequence number for end of update-side operation. */
static void rcu_seq_end(unsigned long *sp)
{
smp_mb(); /* Ensure update-side operation before counter increment. */
WRITE_ONCE(*sp, *sp + 1);
WARN_ON_ONCE(*sp & 0x1);
}
/* Take a snapshot of the update side's sequence number. */
static unsigned long rcu_seq_snap(unsigned long *sp)
{
unsigned long s;
smp_mb(); /* Caller's modifications seen first by other CPUs. */
s = (READ_ONCE(*sp) + 3) & ~0x1;
smp_mb(); /* Above access must not bleed into critical section. */
return s;
}
/*
* Given a snapshot from rcu_seq_snap(), determine whether or not a
* full update-side operation has occurred.
*/
static bool rcu_seq_done(unsigned long *sp, unsigned long s)
{
return ULONG_CMP_GE(READ_ONCE(*sp), s);
}
/* Wrapper functions for expedited grace periods. */
static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
{
rcu_seq_start(&rsp->expedited_sequence);
}
static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
{
rcu_seq_end(&rsp->expedited_sequence);
smp_mb(); /* Ensure that consecutive grace periods serialize. */
}
static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
{
return rcu_seq_snap(&rsp->expedited_sequence);
}
static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
{
return rcu_seq_done(&rsp->expedited_sequence, s);
}
/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp,
atomic_long_t *stat, unsigned long s)
{
if (rcu_exp_gp_seq_done(rsp, s)) {
if (rnp)
mutex_unlock(&rnp->exp_funnel_mutex);
else if (rdp)
mutex_unlock(&rdp->exp_funnel_mutex);
/* Ensure test happens before caller kfree(). */
smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(stat);
return true;
}
return false;
}
/*
* Funnel-lock acquisition for expedited grace periods. Returns a
* pointer to the root rcu_node structure, or NULL if some other
* task did the expedited grace period for us.
*/
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
{
struct rcu_data *rdp;
struct rcu_node *rnp0;
struct rcu_node *rnp1 = NULL;
/*
* First try directly acquiring the root lock in order to reduce
* latency in the common case where expedited grace periods are
* rare. We check mutex_is_locked() to avoid pathological levels of
* memory contention on ->exp_funnel_mutex in the heavy-load case.
*/
rnp0 = rcu_get_root(rsp);
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
if (sync_exp_work_done(rsp, rnp0, NULL,
&rsp->expedited_workdone0, s))
return NULL;
return rnp0;
}
}
/*
* Each pass through the following loop works its way
* up the rcu_node tree, returning if others have done the
* work or otherwise falls through holding the root rnp's
* ->exp_funnel_mutex. The mapping from CPU to rcu_node structure
* can be inexact, as it is just promoting locality and is not
* strictly needed for correctness.
*/
rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
return NULL;
mutex_lock(&rdp->exp_funnel_mutex);
rnp0 = rdp->mynode;
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
if (sync_exp_work_done(rsp, rnp1, rdp,
&rsp->expedited_workdone2, s))
return NULL;
mutex_lock(&rnp0->exp_funnel_mutex);
if (rnp1)
mutex_unlock(&rnp1->exp_funnel_mutex);
else
mutex_unlock(&rdp->exp_funnel_mutex);
rnp1 = rnp0;
}
if (sync_exp_work_done(rsp, rnp1, rdp,
&rsp->expedited_workdone3, s))
return NULL;
return rnp1;
}
/* Invoked on each online non-idle CPU for expedited quiescent state. */
static int synchronize_sched_expedited_cpu_stop(void *data)
{
/*
* There must be a full memory barrier on each affected CPU
* between the time that try_stop_cpus() is called and the
* time that it returns.
*
* In the current initial implementation of cpu_stop, the
* above condition is already met when the control reaches
* this point and the following smp_mb() is not strictly
* necessary. Do smp_mb() anyway for documentation and
* robustness against future implementation changes.
*/
smp_mb(); /* See above comment block. */
struct rcu_data *rdp = data;
struct rcu_state *rsp = rdp->rsp;
/* We are here: If we are last, do the wakeup. */
rdp->exp_done = true;
if (atomic_dec_and_test(&rsp->expedited_need_qs))
wake_up(&rsp->expedited_wq);
return 0;
}
static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
{
int cpu;
unsigned long jiffies_stall;
unsigned long jiffies_start;
struct rcu_data *rdp;
int ret;
jiffies_stall = rcu_jiffies_till_stall_check();
jiffies_start = jiffies;
for (;;) {
ret = wait_event_interruptible_timeout(
rsp->expedited_wq,
!atomic_read(&rsp->expedited_need_qs),
jiffies_stall);
if (ret > 0)
return;
if (ret < 0) {
/* Hit a signal, disable CPU stall warnings. */
wait_event(rsp->expedited_wq,
!atomic_read(&rsp->expedited_need_qs));
return;
}
pr_err("INFO: %s detected expedited stalls on CPUs: {",
rsp->name);
for_each_online_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu);
if (rdp->exp_done)
continue;
pr_cont(" %d", cpu);
}
pr_cont(" } %lu jiffies s: %lu\n",
jiffies - jiffies_start, rsp->expedited_sequence);
for_each_online_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu);
if (rdp->exp_done)
continue;
dump_cpu_task(cpu);
}
jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
}
}
/**
* synchronize_sched_expedited - Brute-force RCU-sched grace period
*
@@ -3333,58 +3524,21 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
* restructure your code to batch your updates, and then use a single
* synchronize_sched() instead.
*
* This implementation can be thought of as an application of ticket
* locking to RCU, with sync_sched_expedited_started and
* sync_sched_expedited_done taking on the roles of the halves
* of the ticket-lock word. Each task atomically increments
* sync_sched_expedited_started upon entry, snapshotting the old value,
* then attempts to stop all the CPUs. If this succeeds, then each
* CPU will have executed a context switch, resulting in an RCU-sched
* grace period. We are then done, so we use atomic_cmpxchg() to
* update sync_sched_expedited_done to match our snapshot -- but
* only if someone else has not already advanced past our snapshot.
*
* On the other hand, if try_stop_cpus() fails, we check the value
* of sync_sched_expedited_done. If it has advanced past our
* initial snapshot, then someone else must have forced a grace period
* some time after we took our snapshot. In this case, our work is
* done for us, and we can simply return. Otherwise, we try again,
* but keep our initial snapshot for purposes of checking for someone
* doing our work for us.
*
* If we fail too many times in a row, we fall back to synchronize_sched().
* This implementation can be thought of as an application of sequence
* locking to expedited grace periods, but using the sequence counter to
* determine when someone else has already done the work instead of for
* retrying readers.
*/
void synchronize_sched_expedited(void)
{
cpumask_var_t cm;
bool cma = false;
int cpu;
long firstsnap, s, snap;
int trycount = 0;
unsigned long s;
struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_sched_state;
/*
* If we are in danger of counter wrap, just do synchronize_sched().
* By allowing sync_sched_expedited_started to advance no more than
* ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
* that more than 3.5 billion CPUs would be required to force a
* counter wrap on a 32-bit system. Quite a few more CPUs would of
* course be required on a 64-bit system.
*/
if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
(ulong)atomic_long_read(&rsp->expedited_done) +
ULONG_MAX / 8)) {
wait_rcu_gp(call_rcu_sched);
atomic_long_inc(&rsp->expedited_wrap);
return;
}
/* Take a snapshot of the sequence number. */
s = rcu_exp_gp_seq_snap(rsp);
/*
* Take a ticket. Note that atomic_inc_return() implies a
* full memory barrier.
*/
snap = atomic_long_inc_return(&rsp->expedited_start);
firstsnap = snap;
if (!try_get_online_cpus()) {
/* CPU hotplug operation in flight, fall back to normal GP. */
wait_rcu_gp(call_rcu_sched);
@@ -3393,100 +3547,38 @@ void synchronize_sched_expedited(void)
}
WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
/* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */
cma = zalloc_cpumask_var(&cm, GFP_KERNEL);
if (cma) {
cpumask_copy(cm, cpu_online_mask);
cpumask_clear_cpu(raw_smp_processor_id(), cm);
for_each_cpu(cpu, cm) {
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
cpumask_clear_cpu(cpu, cm);
}
if (cpumask_weight(cm) == 0)
goto all_cpus_idle;
}
/*
* Each pass through the following loop attempts to force a
* context switch on each CPU.
*/
while (try_stop_cpus(cma ? cm : cpu_online_mask,
synchronize_sched_expedited_cpu_stop,
NULL) == -EAGAIN) {
rnp = exp_funnel_lock(rsp, s);
if (rnp == NULL) {
put_online_cpus();
atomic_long_inc(&rsp->expedited_tryfail);
/* Check to see if someone else did our work for us. */
s = atomic_long_read(&rsp->expedited_done);
if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
/* ensure test happens before caller kfree */
smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(&rsp->expedited_workdone1);
free_cpumask_var(cm);
return;
}
/* No joy, try again later. Or just synchronize_sched(). */
if (trycount++ < 10) {
udelay(trycount * num_online_cpus());
} else {
wait_rcu_gp(call_rcu_sched);
atomic_long_inc(&rsp->expedited_normal);
free_cpumask_var(cm);
return;
}
/* Recheck to see if someone else did our work for us. */
s = atomic_long_read(&rsp->expedited_done);
if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
/* ensure test happens before caller kfree */
smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(&rsp->expedited_workdone2);
free_cpumask_var(cm);
return;
}
/*
* Refetching sync_sched_expedited_started allows later
* callers to piggyback on our grace period. We retry
* after they started, so our grace period works for them,
* and they started after our first try, so their grace
* period works for us.
*/
if (!try_get_online_cpus()) {
/* CPU hotplug operation in flight, use normal GP. */
wait_rcu_gp(call_rcu_sched);
atomic_long_inc(&rsp->expedited_normal);
free_cpumask_var(cm);
return;
}
snap = atomic_long_read(&rsp->expedited_start);
smp_mb(); /* ensure read is before try_stop_cpus(). */
return; /* Someone else did our work for us. */
}
atomic_long_inc(&rsp->expedited_stoppedcpus);
all_cpus_idle:
free_cpumask_var(cm);
rcu_exp_gp_seq_start(rsp);
/*
* Everyone up to our most recent fetch is covered by our grace
* period. Update the counter, but only if our work is still
* relevant -- which it won't be if someone who started later
* than we did already did their update.
*/
do {
atomic_long_inc(&rsp->expedited_done_tries);
s = atomic_long_read(&rsp->expedited_done);
if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
/* ensure test happens before caller kfree */
smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(&rsp->expedited_done_lost);
break;
}
} while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
atomic_long_inc(&rsp->expedited_done_exit);
/* Stop each CPU that is online, non-idle, and not us. */
init_waitqueue_head(&rsp->expedited_wq);
atomic_set(&rsp->expedited_need_qs, 1); /* Extra count avoids race. */
for_each_online_cpu(cpu) {
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
rdp->exp_done = false;
/* Skip our CPU and any idle CPUs. */
if (raw_smp_processor_id() == cpu ||
!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
continue;
atomic_inc(&rsp->expedited_need_qs);
stop_one_cpu_nowait(cpu, synchronize_sched_expedited_cpu_stop,
rdp, &rdp->exp_stop_work);
}
/* Remove extra count and, if necessary, wait for CPUs to stop. */
if (!atomic_dec_and_test(&rsp->expedited_need_qs))
synchronize_sched_expedited_wait(rsp);
rcu_exp_gp_seq_end(rsp);
mutex_unlock(&rnp->exp_funnel_mutex);
put_online_cpus();
}
@@ -3623,10 +3715,10 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
struct rcu_state *rsp = rdp->rsp;
if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
_rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
_rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence);
complete(&rsp->barrier_completion);
} else {
_rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
_rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence);
}
}
@@ -3638,7 +3730,7 @@ static void rcu_barrier_func(void *type)
struct rcu_state *rsp = type;
struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
_rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
_rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence);
atomic_inc(&rsp->barrier_cpu_count);
rsp->call(&rdp->barrier_head, rcu_barrier_callback);
}
@@ -3651,55 +3743,24 @@ static void _rcu_barrier(struct rcu_state *rsp)
{
int cpu;
struct rcu_data *rdp;
unsigned long snap = READ_ONCE(rsp->n_barrier_done);
unsigned long snap_done;
unsigned long s = rcu_seq_snap(&rsp->barrier_sequence);
_rcu_barrier_trace(rsp, "Begin", -1, snap);
_rcu_barrier_trace(rsp, "Begin", -1, s);
/* Take mutex to serialize concurrent rcu_barrier() requests. */
mutex_lock(&rsp->barrier_mutex);
/*
* Ensure that all prior references, including to ->n_barrier_done,
* are ordered before the _rcu_barrier() machinery.
*/
smp_mb(); /* See above block comment. */
/*
* Recheck ->n_barrier_done to see if others did our work for us.
* This means checking ->n_barrier_done for an even-to-odd-to-even
* transition. The "if" expression below therefore rounds the old
* value up to the next even number and adds two before comparing.
*/
snap_done = rsp->n_barrier_done;
_rcu_barrier_trace(rsp, "Check", -1, snap_done);
/*
* If the value in snap is odd, we needed to wait for the current
* rcu_barrier() to complete, then wait for the next one, in other
* words, we need the value of snap_done to be three larger than
* the value of snap. On the other hand, if the value in snap is
* even, we only had to wait for the next rcu_barrier() to complete,
* in other words, we need the value of snap_done to be only two
* greater than the value of snap. The "(snap + 3) & ~0x1" computes
* this for us (thank you, Linus!).
*/
if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
_rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
/* Did someone else do our work for us? */
if (rcu_seq_done(&rsp->barrier_sequence, s)) {
_rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence);
smp_mb(); /* caller's subsequent code after above check. */
mutex_unlock(&rsp->barrier_mutex);
return;
}
/*
* Increment ->n_barrier_done to avoid duplicate work. Use
* WRITE_ONCE() to prevent the compiler from speculating
* the increment to precede the early-exit check.
*/
WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1);
WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
_rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
/* Mark the start of the barrier operation. */
rcu_seq_start(&rsp->barrier_sequence);
_rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence);
/*
* Initialize the count to one rather than to zero in order to
@@ -3723,10 +3784,10 @@ static void _rcu_barrier(struct rcu_state *rsp)
if (rcu_is_nocb_cpu(cpu)) {
if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
_rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
rsp->n_barrier_done);
rsp->barrier_sequence);
} else {
_rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
rsp->n_barrier_done);
rsp->barrier_sequence);
smp_mb__before_atomic();
atomic_inc(&rsp->barrier_cpu_count);
__call_rcu(&rdp->barrier_head,
@@ -3734,11 +3795,11 @@ static void _rcu_barrier(struct rcu_state *rsp)
}
} else if (READ_ONCE(rdp->qlen)) {
_rcu_barrier_trace(rsp, "OnlineQ", cpu,
rsp->n_barrier_done);
rsp->barrier_sequence);
smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
} else {
_rcu_barrier_trace(rsp, "OnlineNQ", cpu,
rsp->n_barrier_done);
rsp->barrier_sequence);
}
}
put_online_cpus();
@@ -3750,16 +3811,13 @@ static void _rcu_barrier(struct rcu_state *rsp)
if (atomic_dec_and_test(&rsp->barrier_cpu_count))
complete(&rsp->barrier_completion);
/* Increment ->n_barrier_done to prevent duplicate work. */
smp_mb(); /* Keep increment after above mechanism. */
WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1);
WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
_rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
smp_mb(); /* Keep increment before caller's subsequent code. */
/* Wait for all rcu_barrier_callback() callbacks to be invoked. */
wait_for_completion(&rsp->barrier_completion);
/* Mark the end of the barrier operation. */
_rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence);
rcu_seq_end(&rsp->barrier_sequence);
/* Other rcu_barrier() invocations can now safely proceed. */
mutex_unlock(&rsp->barrier_mutex);
}
@@ -3822,6 +3880,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
rdp->cpu = cpu;
rdp->rsp = rsp;
mutex_init(&rdp->exp_funnel_mutex);
rcu_boot_init_nocb_percpu_data(rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
@@ -4013,22 +4072,22 @@ void rcu_scheduler_starting(void)
* Compute the per-level fanout, either using the exact fanout specified
* or balancing the tree, depending on the rcu_fanout_exact boot parameter.
*/
static void __init rcu_init_levelspread(struct rcu_state *rsp)
static void __init rcu_init_levelspread(int *levelspread, const int *levelcnt)
{
int i;
if (rcu_fanout_exact) {
rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
for (i = rcu_num_lvls - 2; i >= 0; i--)
rsp->levelspread[i] = RCU_FANOUT;
levelspread[i] = RCU_FANOUT;
} else {
int ccur;
int cprv;
cprv = nr_cpu_ids;
for (i = rcu_num_lvls - 1; i >= 0; i--) {
ccur = rsp->levelcnt[i];
rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
ccur = levelcnt[i];
levelspread[i] = (cprv + ccur - 1) / ccur;
cprv = ccur;
}
}
@@ -4040,23 +4099,20 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
static void __init rcu_init_one(struct rcu_state *rsp,
struct rcu_data __percpu *rda)
{
static const char * const buf[] = {
"rcu_node_0",
"rcu_node_1",
"rcu_node_2",
"rcu_node_3" }; /* Match MAX_RCU_LVLS */
static const char * const fqs[] = {
"rcu_node_fqs_0",
"rcu_node_fqs_1",
"rcu_node_fqs_2",
"rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
static const char * const buf[] = RCU_NODE_NAME_INIT;
static const char * const fqs[] = RCU_FQS_NAME_INIT;
static const char * const exp[] = RCU_EXP_NAME_INIT;
static const char * const exp_sched[] = RCU_EXP_SCHED_NAME_INIT;
static u8 fl_mask = 0x1;
int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */
int levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
int cpustride = 1;
int i;
int j;
struct rcu_node *rnp;
BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
BUILD_BUG_ON(RCU_NUM_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
/* Silence gcc 4.8 false positive about array index out of range. */
if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS)
@@ -4065,19 +4121,19 @@ static void __init rcu_init_one(struct rcu_state *rsp,
/* Initialize the level-tracking arrays. */
for (i = 0; i < rcu_num_lvls; i++)
rsp->levelcnt[i] = num_rcu_lvl[i];
levelcnt[i] = num_rcu_lvl[i];
for (i = 1; i < rcu_num_lvls; i++)
rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
rcu_init_levelspread(rsp);
rsp->level[i] = rsp->level[i - 1] + levelcnt[i - 1];
rcu_init_levelspread(levelspread, levelcnt);
rsp->flavor_mask = fl_mask;
fl_mask <<= 1;
/* Initialize the elements themselves, starting from the leaves. */
for (i = rcu_num_lvls - 1; i >= 0; i--) {
cpustride *= rsp->levelspread[i];
cpustride *= levelspread[i];
rnp = rsp->level[i];
for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
for (j = 0; j < levelcnt[i]; j++, rnp++) {
raw_spin_lock_init(&rnp->lock);
lockdep_set_class_and_name(&rnp->lock,
&rcu_node_class[i], buf[i]);
@@ -4097,14 +4153,23 @@ static void __init rcu_init_one(struct rcu_state *rsp,
rnp->grpmask = 0;
rnp->parent = NULL;
} else {
rnp->grpnum = j % rsp->levelspread[i - 1];
rnp->grpnum = j % levelspread[i - 1];
rnp->grpmask = 1UL << rnp->grpnum;
rnp->parent = rsp->level[i - 1] +
j / rsp->levelspread[i - 1];
j / levelspread[i - 1];
}
rnp->level = i;
INIT_LIST_HEAD(&rnp->blkd_tasks);
rcu_init_one_nocb(rnp);
mutex_init(&rnp->exp_funnel_mutex);
if (rsp == &rcu_sched_state)
lockdep_set_class_and_name(
&rnp->exp_funnel_mutex,
&rcu_exp_sched_class[i], exp_sched[i]);
else
lockdep_set_class_and_name(
&rnp->exp_funnel_mutex,
&rcu_exp_class[i], exp[i]);
}
}
@@ -4128,9 +4193,7 @@ static void __init rcu_init_geometry(void)
{
ulong d;
int i;
int j;
int n = nr_cpu_ids;
int rcu_capacity[MAX_RCU_LVLS + 1];
int rcu_capacity[RCU_NUM_LVLS];
/*
* Initialize any unspecified boot parameters.
@@ -4152,48 +4215,50 @@ static void __init rcu_init_geometry(void)
pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
rcu_fanout_leaf, nr_cpu_ids);
/*
* Compute number of nodes that can be handled an rcu_node tree
* with the given number of levels. Setting rcu_capacity[0] makes
* some of the arithmetic easier.
*/
rcu_capacity[0] = 1;
rcu_capacity[1] = rcu_fanout_leaf;
for (i = 2; i <= MAX_RCU_LVLS; i++)
rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;
/*
* The boot-time rcu_fanout_leaf parameter is only permitted
* to increase the leaf-level fanout, not decrease it. Of course,
* the leaf-level fanout cannot exceed the number of bits in
* the rcu_node masks. Finally, the tree must be able to accommodate
* the configured number of CPUs. Complain and fall back to the
* compile-time values if these limits are exceeded.
* the rcu_node masks. Complain and fall back to the compile-
* time values if these limits are exceeded.
*/
if (rcu_fanout_leaf < RCU_FANOUT_LEAF ||
rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
n > rcu_capacity[MAX_RCU_LVLS]) {
rcu_fanout_leaf > sizeof(unsigned long) * 8) {
rcu_fanout_leaf = RCU_FANOUT_LEAF;
WARN_ON(1);
return;
}
/*
* Compute number of nodes that can be handled an rcu_node tree
* with the given number of levels.
*/
rcu_capacity[0] = rcu_fanout_leaf;
for (i = 1; i < RCU_NUM_LVLS; i++)
rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;
/*
* The tree must be able to accommodate the configured number of CPUs.
* If this limit is exceeded than we have a serious problem elsewhere.
*/
if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1])
panic("rcu_init_geometry: rcu_capacity[] is too small");
/* Calculate the number of levels in the tree. */
for (i = 0; nr_cpu_ids > rcu_capacity[i]; i++) {
}
rcu_num_lvls = i + 1;
/* Calculate the number of rcu_nodes at each level of the tree. */
for (i = 1; i <= MAX_RCU_LVLS; i++)
if (n <= rcu_capacity[i]) {
for (j = 0; j <= i; j++)
num_rcu_lvl[j] =
DIV_ROUND_UP(n, rcu_capacity[i - j]);
rcu_num_lvls = i;
for (j = i + 1; j <= MAX_RCU_LVLS; j++)
num_rcu_lvl[j] = 0;
break;
}
for (i = 0; i < rcu_num_lvls; i++) {
int cap = rcu_capacity[(rcu_num_lvls - 1) - i];
num_rcu_lvl[i] = DIV_ROUND_UP(nr_cpu_ids, cap);
}
/* Calculate the total number of rcu_node structures. */
rcu_num_nodes = 0;
for (i = 0; i <= MAX_RCU_LVLS; i++)
for (i = 0; i < rcu_num_lvls; i++)
rcu_num_nodes += num_rcu_lvl[i];
rcu_num_nodes -= n;
}
/*