rcu: Remove expedited GP funnel-lock bypass

Commit #cdacbe1f91264 ("rcu: Add fastpath bypassing funnel locking")
turns out to be a pessimization at high load because it forces a tree
full of tasks to wait for an expedited grace period that they probably
do not need.  This commit therefore removes this optimization.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
Paul E. McKenney
2016-01-30 17:23:19 -08:00
parent 4f41530245
commit e2fd9d3584
4 changed files with 8 additions and 29 deletions

View File

@@ -3616,25 +3616,6 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
struct rcu_node *rnp0;
struct rcu_node *rnp1 = NULL;
/*
* First try directly acquiring the root lock in order to reduce
* latency in the common case where expedited grace periods are
* rare. We check mutex_is_locked() to avoid pathological levels of
* memory contention on ->exp_funnel_mutex in the heavy-load case.
*/
rnp0 = rcu_get_root(rsp);
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
trace_rcu_exp_funnel_lock(rsp->name, rnp0->level,
rnp0->grplo, rnp0->grphi,
TPS("acq"));
if (sync_exp_work_done(rsp, rnp0, NULL,
&rdp->expedited_workdone0, s))
return NULL;
return rnp0;
}
}
/*
* Each pass through the following loop works its way
* up the rcu_node tree, returning if others have done the

View File

@@ -388,7 +388,6 @@ struct rcu_data {
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
struct mutex exp_funnel_mutex;
atomic_long_t expedited_workdone0; /* # done by others #0. */
atomic_long_t expedited_workdone1; /* # done by others #1. */
atomic_long_t expedited_workdone2; /* # done by others #2. */
atomic_long_t expedited_workdone3; /* # done by others #3. */

View File

@@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v)
int cpu;
struct rcu_state *rsp = (struct rcu_state *)m->private;
struct rcu_data *rdp;
unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
unsigned long s1 = 0, s2 = 0, s3 = 0;
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu);
s0 += atomic_long_read(&rdp->expedited_workdone0);
s1 += atomic_long_read(&rdp->expedited_workdone1);
s2 += atomic_long_read(&rdp->expedited_workdone2);
s3 += atomic_long_read(&rdp->expedited_workdone3);
}
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
rsp->expedited_sequence, s0, s1, s2, s3,
seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
rsp->expedited_sequence, s1, s2, s3,
atomic_long_read(&rsp->expedited_normal),
atomic_read(&rsp->expedited_need_qs),
rsp->expedited_sequence / 2);