Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu into core/rcu

This commit is contained in:
Ingo Molnar
2010-12-23 12:57:04 +01:00
17 changed files with 1206 additions and 284 deletions

View File

@@ -36,31 +36,16 @@
#include <linux/time.h>
#include <linux/cpu.h>
/* Global control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
struct rcu_head **curtail; /* ->next pointer of last CB. */
};
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_sched_ctrlblk = {
.donetail = &rcu_sched_ctrlblk.rcucblist,
.curtail = &rcu_sched_ctrlblk.rcucblist,
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.donetail = &rcu_bh_ctrlblk.rcucblist,
.curtail = &rcu_bh_ctrlblk.rcucblist,
};
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
static struct task_struct *rcu_kthread_task;
static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
static unsigned long have_rcu_kthread_work;
static void invoke_rcu_kthread(void);
/* Forward declarations for rcutiny_plugin.h. */
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
struct rcu_ctrlblk;
static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
static int rcu_kthread(void *arg);
static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp);
@@ -123,7 +108,7 @@ void rcu_sched_qs(int cpu)
{
if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
rcu_qsctr_help(&rcu_bh_ctrlblk))
raise_softirq(RCU_SOFTIRQ);
invoke_rcu_kthread();
}
/*
@@ -132,7 +117,7 @@ void rcu_sched_qs(int cpu)
void rcu_bh_qs(int cpu)
{
if (rcu_qsctr_help(&rcu_bh_ctrlblk))
raise_softirq(RCU_SOFTIRQ);
invoke_rcu_kthread();
}
/*
@@ -152,13 +137,14 @@ void rcu_check_callbacks(int cpu, int user)
}
/*
* Helper function for rcu_process_callbacks() that operates on the
* specified rcu_ctrlkblk structure.
* Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
* whose grace period has elapsed.
*/
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
{
struct rcu_head *next, *list;
unsigned long flags;
RCU_TRACE(int cb_count = 0);
/* If no RCU callbacks ready to invoke, just return. */
if (&rcp->rcucblist == rcp->donetail)
@@ -180,19 +166,58 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
next = list->next;
prefetch(next);
debug_rcu_head_unqueue(list);
local_bh_disable();
list->func(list);
local_bh_enable();
list = next;
RCU_TRACE(cb_count++);
}
RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
}
/*
* Invoke any callbacks whose grace period has completed.
* This kthread invokes RCU callbacks whose grace periods have
* elapsed. It is awakened as needed, and takes the place of the
* RCU_SOFTIRQ that was used previously for this purpose.
* This is a kthread, but it is never stopped, at least not until
* the system goes down.
*/
static void rcu_process_callbacks(struct softirq_action *unused)
static int rcu_kthread(void *arg)
{
__rcu_process_callbacks(&rcu_sched_ctrlblk);
__rcu_process_callbacks(&rcu_bh_ctrlblk);
rcu_preempt_process_callbacks();
unsigned long work;
unsigned long morework;
unsigned long flags;
for (;;) {
wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0);
morework = rcu_boost();
local_irq_save(flags);
work = have_rcu_kthread_work;
have_rcu_kthread_work = morework;
local_irq_restore(flags);
if (work) {
rcu_process_callbacks(&rcu_sched_ctrlblk);
rcu_process_callbacks(&rcu_bh_ctrlblk);
rcu_preempt_process_callbacks();
}
schedule_timeout_interruptible(1); /* Leave CPU for others. */
}
return 0; /* Not reached, but needed to shut gcc up. */
}
/*
* Wake up rcu_kthread() to process callbacks now eligible for invocation
* or to boost readers.
*/
static void invoke_rcu_kthread(void)
{
unsigned long flags;
local_irq_save(flags);
have_rcu_kthread_work = 1;
wake_up(&rcu_kthread_wq);
local_irq_restore(flags);
}
/*
@@ -230,6 +255,7 @@ static void __call_rcu(struct rcu_head *head,
local_irq_save(flags);
*rcp->curtail = head;
rcp->curtail = &head->next;
RCU_TRACE(rcp->qlen++);
local_irq_restore(flags);
}
@@ -282,7 +308,16 @@ void rcu_barrier_sched(void)
}
EXPORT_SYMBOL_GPL(rcu_barrier_sched);
void __init rcu_init(void)
/*
* Spawn the kthread that invokes RCU callbacks.
*/
static int __init rcu_spawn_kthreads(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
struct sched_param sp;
rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
sp.sched_priority = RCU_BOOST_PRIO;
sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
return 0;
}
early_initcall(rcu_spawn_kthreads);

View File

@@ -22,6 +22,40 @@
* Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
#include <linux/kthread.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#ifdef CONFIG_RCU_TRACE
#define RCU_TRACE(stmt) stmt
#else /* #ifdef CONFIG_RCU_TRACE */
#define RCU_TRACE(stmt)
#endif /* #else #ifdef CONFIG_RCU_TRACE */
/* Global control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
struct rcu_head **curtail; /* ->next pointer of last CB. */
RCU_TRACE(long qlen); /* Number of pending CBs. */
};
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_sched_ctrlblk = {
.donetail = &rcu_sched_ctrlblk.rcucblist,
.curtail = &rcu_sched_ctrlblk.rcucblist,
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.donetail = &rcu_bh_ctrlblk.rcucblist,
.curtail = &rcu_bh_ctrlblk.rcucblist,
};
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_TINY_PREEMPT_RCU
#include <linux/delay.h>
@@ -46,17 +80,45 @@ struct rcu_preempt_ctrlblk {
struct list_head *gp_tasks;
/* Pointer to the first task blocking the */
/* current grace period, or NULL if there */
/* is not such task. */
/* is no such task. */
struct list_head *exp_tasks;
/* Pointer to first task blocking the */
/* current expedited grace period, or NULL */
/* if there is no such task. If there */
/* is no current expedited grace period, */
/* then there cannot be any such task. */
#ifdef CONFIG_RCU_BOOST
struct list_head *boost_tasks;
/* Pointer to first task that needs to be */
/* priority-boosted, or NULL if no priority */
/* boosting is needed. If there is no */
/* current or expedited grace period, there */
/* can be no such task. */
#endif /* #ifdef CONFIG_RCU_BOOST */
u8 gpnum; /* Current grace period. */
u8 gpcpu; /* Last grace period blocked by the CPU. */
u8 completed; /* Last grace period completed. */
/* If all three are equal, RCU is idle. */
#ifdef CONFIG_RCU_BOOST
s8 boosted_this_gp; /* Has boosting already happened? */
unsigned long boost_time; /* When to start boosting (jiffies) */
#endif /* #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_TRACE
unsigned long n_grace_periods;
#ifdef CONFIG_RCU_BOOST
unsigned long n_tasks_boosted;
unsigned long n_exp_boosts;
unsigned long n_normal_boosts;
unsigned long n_normal_balk_blkd_tasks;
unsigned long n_normal_balk_gp_tasks;
unsigned long n_normal_balk_boost_tasks;
unsigned long n_normal_balk_boosted;
unsigned long n_normal_balk_notyet;
unsigned long n_normal_balk_nos;
unsigned long n_exp_balk_blkd_tasks;
unsigned long n_exp_balk_nos;
#endif /* #ifdef CONFIG_RCU_BOOST */
#endif /* #ifdef CONFIG_RCU_TRACE */
};
static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
@@ -121,6 +183,210 @@ static int rcu_preempt_gp_in_progress(void)
return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
}
/*
* Advance a ->blkd_tasks-list pointer to the next entry, instead
* returning NULL if at the end of the list.
*/
static struct list_head *rcu_next_node_entry(struct task_struct *t)
{
struct list_head *np;
np = t->rcu_node_entry.next;
if (np == &rcu_preempt_ctrlblk.blkd_tasks)
np = NULL;
return np;
}
#ifdef CONFIG_RCU_TRACE
#ifdef CONFIG_RCU_BOOST
static void rcu_initiate_boost_trace(void);
static void rcu_initiate_exp_boost_trace(void);
#endif /* #ifdef CONFIG_RCU_BOOST */
/*
* Dump additional statistice for TINY_PREEMPT_RCU.
*/
static void show_tiny_preempt_stats(struct seq_file *m)
{
seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n",
rcu_preempt_ctrlblk.rcb.qlen,
rcu_preempt_ctrlblk.n_grace_periods,
rcu_preempt_ctrlblk.gpnum,
rcu_preempt_ctrlblk.gpcpu,
rcu_preempt_ctrlblk.completed,
"T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)],
"N."[!rcu_preempt_ctrlblk.gp_tasks],
"E."[!rcu_preempt_ctrlblk.exp_tasks]);
#ifdef CONFIG_RCU_BOOST
seq_printf(m, " ttb=%c btg=",
"B."[!rcu_preempt_ctrlblk.boost_tasks]);
switch (rcu_preempt_ctrlblk.boosted_this_gp) {
case -1:
seq_puts(m, "exp");
break;
case 0:
seq_puts(m, "no");
break;
case 1:
seq_puts(m, "begun");
break;
case 2:
seq_puts(m, "done");
break;
default:
seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp);
}
seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
rcu_preempt_ctrlblk.n_tasks_boosted,
rcu_preempt_ctrlblk.n_exp_boosts,
rcu_preempt_ctrlblk.n_normal_boosts,
(int)(jiffies & 0xffff),
(int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n",
"normal balk",
rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks,
rcu_preempt_ctrlblk.n_normal_balk_gp_tasks,
rcu_preempt_ctrlblk.n_normal_balk_boost_tasks,
rcu_preempt_ctrlblk.n_normal_balk_boosted,
rcu_preempt_ctrlblk.n_normal_balk_notyet,
rcu_preempt_ctrlblk.n_normal_balk_nos);
seq_printf(m, " exp balk: bt=%lu nos=%lu\n",
rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
rcu_preempt_ctrlblk.n_exp_balk_nos);
#endif /* #ifdef CONFIG_RCU_BOOST */
}
#endif /* #ifdef CONFIG_RCU_TRACE */
#ifdef CONFIG_RCU_BOOST
#include "rtmutex_common.h"
/*
* Carry out RCU priority boosting on the task indicated by ->boost_tasks,
* and advance ->boost_tasks to the next task in the ->blkd_tasks list.
*/
static int rcu_boost(void)
{
unsigned long flags;
struct rt_mutex mtx;
struct list_head *np;
struct task_struct *t;
if (rcu_preempt_ctrlblk.boost_tasks == NULL)
return 0; /* Nothing to boost. */
raw_local_irq_save(flags);
rcu_preempt_ctrlblk.boosted_this_gp++;
t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
rcu_node_entry);
np = rcu_next_node_entry(t);
rt_mutex_init_proxy_locked(&mtx, t);
t->rcu_boost_mutex = &mtx;
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
raw_local_irq_restore(flags);
rt_mutex_lock(&mtx);
RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
rcu_preempt_ctrlblk.boosted_this_gp++;
rt_mutex_unlock(&mtx);
return rcu_preempt_ctrlblk.boost_tasks != NULL;
}
/*
* Check to see if it is now time to start boosting RCU readers blocking
* the current grace period, and, if so, tell the rcu_kthread_task to
* start boosting them. If there is an expedited boost in progress,
* we wait for it to complete.
*
* If there are no blocked readers blocking the current grace period,
* return 0 to let the caller know, otherwise return 1. Note that this
* return value is independent of whether or not boosting was done.
*/
static int rcu_initiate_boost(void)
{
if (!rcu_preempt_blocked_readers_cgp()) {
RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++);
return 0;
}
if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
rcu_preempt_ctrlblk.boost_tasks == NULL &&
rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
invoke_rcu_kthread();
RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
} else
RCU_TRACE(rcu_initiate_boost_trace());
return 1;
}
/*
* Initiate boosting for an expedited grace period.
*/
static void rcu_initiate_expedited_boost(void)
{
unsigned long flags;
raw_local_irq_save(flags);
if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
rcu_preempt_ctrlblk.boost_tasks =
rcu_preempt_ctrlblk.blkd_tasks.next;
rcu_preempt_ctrlblk.boosted_this_gp = -1;
invoke_rcu_kthread();
RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
} else
RCU_TRACE(rcu_initiate_exp_boost_trace());
raw_local_irq_restore(flags);
}
#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
/*
* Do priority-boost accounting for the start of a new grace period.
*/
static void rcu_preempt_boost_start_gp(void)
{
rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
rcu_preempt_ctrlblk.boosted_this_gp = 0;
}
#else /* #ifdef CONFIG_RCU_BOOST */
/*
* If there is no RCU priority boosting, we don't boost.
*/
static int rcu_boost(void)
{
return 0;
}
/*
* If there is no RCU priority boosting, we don't initiate boosting,
* but we do indicate whether there are blocked readers blocking the
* current grace period.
*/
static int rcu_initiate_boost(void)
{
return rcu_preempt_blocked_readers_cgp();
}
/*
* If there is no RCU priority boosting, we don't initiate expedited boosting.
*/
static void rcu_initiate_expedited_boost(void)
{
}
/*
* If there is no RCU priority boosting, nothing to do at grace-period start.
*/
static void rcu_preempt_boost_start_gp(void)
{
}
#endif /* else #ifdef CONFIG_RCU_BOOST */
/*
* Record a preemptible-RCU quiescent state for the specified CPU. Note
* that this just means that the task currently running on the CPU is
@@ -148,11 +414,14 @@ static void rcu_preempt_cpu_qs(void)
rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
/* If there is no GP then there is nothing more to do. */
if (!rcu_preempt_gp_in_progress())
return;
/*
* If there is no GP, or if blocked readers are still blocking GP,
* then there is nothing more to do.
* Check up on boosting. If there are no readers blocking the
* current grace period, leave.
*/
if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
if (rcu_initiate_boost())
return;
/* Advance callbacks. */
@@ -164,9 +433,9 @@ static void rcu_preempt_cpu_qs(void)
if (!rcu_preempt_blocked_readers_any())
rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
/* If there are done callbacks, make RCU_SOFTIRQ process them. */
/* If there are done callbacks, cause them to be invoked. */
if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
raise_softirq(RCU_SOFTIRQ);
invoke_rcu_kthread();
}
/*
@@ -178,12 +447,16 @@ static void rcu_preempt_start_gp(void)
/* Official start of GP. */
rcu_preempt_ctrlblk.gpnum++;
RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
/* Any blocked RCU readers block new GP. */
if (rcu_preempt_blocked_readers_any())
rcu_preempt_ctrlblk.gp_tasks =
rcu_preempt_ctrlblk.blkd_tasks.next;
/* Set up for RCU priority boosting. */
rcu_preempt_boost_start_gp();
/* If there is no running reader, CPU is done with GP. */
if (!rcu_preempt_running_reader())
rcu_preempt_cpu_qs();
@@ -304,14 +577,16 @@ static void rcu_read_unlock_special(struct task_struct *t)
*/
empty = !rcu_preempt_blocked_readers_cgp();
empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
np = t->rcu_node_entry.next;
if (np == &rcu_preempt_ctrlblk.blkd_tasks)
np = NULL;
np = rcu_next_node_entry(t);
list_del(&t->rcu_node_entry);
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
rcu_preempt_ctrlblk.gp_tasks = np;
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
rcu_preempt_ctrlblk.exp_tasks = np;
#ifdef CONFIG_RCU_BOOST
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
rcu_preempt_ctrlblk.boost_tasks = np;
#endif /* #ifdef CONFIG_RCU_BOOST */
INIT_LIST_HEAD(&t->rcu_node_entry);
/*
@@ -331,6 +606,14 @@ static void rcu_read_unlock_special(struct task_struct *t)
if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
rcu_report_exp_done();
}
#ifdef CONFIG_RCU_BOOST
/* Unboost self if was boosted. */
if (special & RCU_READ_UNLOCK_BOOSTED) {
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
rt_mutex_unlock(t->rcu_boost_mutex);
t->rcu_boost_mutex = NULL;
}
#endif /* #ifdef CONFIG_RCU_BOOST */
local_irq_restore(flags);
}
@@ -374,7 +657,7 @@ static void rcu_preempt_check_callbacks(void)
rcu_preempt_cpu_qs();
if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
rcu_preempt_ctrlblk.rcb.donetail)
raise_softirq(RCU_SOFTIRQ);
invoke_rcu_kthread();
if (rcu_preempt_gp_in_progress() &&
rcu_cpu_blocking_cur_gp() &&
rcu_preempt_running_reader())
@@ -383,7 +666,7 @@ static void rcu_preempt_check_callbacks(void)
/*
* TINY_PREEMPT_RCU has an extra callback-list tail pointer to
* update, so this is invoked from __rcu_process_callbacks() to
* update, so this is invoked from rcu_process_callbacks() to
* handle that case. Of course, it is invoked for all flavors of
* RCU, but RCU callbacks can appear only on one of the lists, and
* neither ->nexttail nor ->donetail can possibly be NULL, so there
@@ -400,7 +683,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
*/
static void rcu_preempt_process_callbacks(void)
{
__rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
}
/*
@@ -417,6 +700,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
local_irq_save(flags);
*rcu_preempt_ctrlblk.nexttail = head;
rcu_preempt_ctrlblk.nexttail = &head->next;
RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++);
rcu_preempt_start_gp(); /* checks to see if GP needed. */
local_irq_restore(flags);
}
@@ -532,6 +816,7 @@ void synchronize_rcu_expedited(void)
/* Wait for tail of ->blkd_tasks list to drain. */
if (rcu_preempted_readers_exp())
rcu_initiate_expedited_boost();
wait_event(sync_rcu_preempt_exp_wq,
!rcu_preempted_readers_exp());
@@ -572,6 +857,27 @@ void exit_rcu(void)
#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
#ifdef CONFIG_RCU_TRACE
/*
* Because preemptible RCU does not exist, it is not necessary to
* dump out its statistics.
*/
static void show_tiny_preempt_stats(struct seq_file *m)
{
}
#endif /* #ifdef CONFIG_RCU_TRACE */
/*
* Because preemptible RCU does not exist, it is never necessary to
* boost preempted RCU readers.
*/
static int rcu_boost(void)
{
return 0;
}
/*
* Because preemptible RCU does not exist, it never has any callbacks
* to check.
@@ -599,17 +905,116 @@ static void rcu_preempt_process_callbacks(void)
#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#include <linux/kernel_stat.h>
/*
* During boot, we forgive RCU lockdep issues. After this function is
* invoked, we start taking RCU lockdep issues seriously.
*/
void rcu_scheduler_starting(void)
void __init rcu_scheduler_starting(void)
{
WARN_ON(nr_context_switches() > 0);
rcu_scheduler_active = 1;
}
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_RCU_BOOST
#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
#else /* #ifdef CONFIG_RCU_BOOST */
#define RCU_BOOST_PRIO 1
#endif /* #else #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_TRACE
#ifdef CONFIG_RCU_BOOST
static void rcu_initiate_boost_trace(void)
{
if (rcu_preempt_ctrlblk.gp_tasks == NULL)
rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++;
else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++;
else if (rcu_preempt_ctrlblk.boosted_this_gp != 0)
rcu_preempt_ctrlblk.n_normal_balk_boosted++;
else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
rcu_preempt_ctrlblk.n_normal_balk_notyet++;
else
rcu_preempt_ctrlblk.n_normal_balk_nos++;
}
static void rcu_initiate_exp_boost_trace(void)
{
if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
else
rcu_preempt_ctrlblk.n_exp_balk_nos++;
}
#endif /* #ifdef CONFIG_RCU_BOOST */
static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
{
unsigned long flags;
raw_local_irq_save(flags);
rcp->qlen -= n;
raw_local_irq_restore(flags);
}
/*
* Dump statistics for TINY_RCU, such as they are.
*/
static int show_tiny_stats(struct seq_file *m, void *unused)
{
show_tiny_preempt_stats(m);
seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
return 0;
}
static int show_tiny_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, show_tiny_stats, NULL);
}
static const struct file_operations show_tiny_stats_fops = {
.owner = THIS_MODULE,
.open = show_tiny_stats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static struct dentry *rcudir;
static int __init rcutiny_trace_init(void)
{
struct dentry *retval;
rcudir = debugfs_create_dir("rcu", NULL);
if (!rcudir)
goto free_out;
retval = debugfs_create_file("rcudata", 0444, rcudir,
NULL, &show_tiny_stats_fops);
if (!retval)
goto free_out;
return 0;
free_out:
debugfs_remove_recursive(rcudir);
return 1;
}
static void __exit rcutiny_trace_cleanup(void)
{
debugfs_remove_recursive(rcudir);
}
module_init(rcutiny_trace_init);
module_exit(rcutiny_trace_cleanup);
MODULE_AUTHOR("Paul E. McKenney");
MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
MODULE_LICENSE("GPL");
#endif /* #ifdef CONFIG_RCU_TRACE */

View File

@@ -47,6 +47,7 @@
#include <linux/srcu.h>
#include <linux/slab.h>
#include <asm/byteorder.h>
#include <linux/sched.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
@@ -64,6 +65,9 @@ static int irqreader = 1; /* RCU readers from irq (timers). */
static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */
static int fqs_holdoff = 0; /* Hold time within burst (us). */
static int fqs_stutter = 3; /* Wait time between bursts (s). */
static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
static char *torture_type = "rcu"; /* What RCU implementation to torture. */
module_param(nreaders, int, 0444);
@@ -88,6 +92,12 @@ module_param(fqs_holdoff, int, 0444);
MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
module_param(fqs_stutter, int, 0444);
MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
module_param(test_boost, int, 0444);
MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
module_param(test_boost_interval, int, 0444);
MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds.");
module_param(test_boost_duration, int, 0444);
MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds.");
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
@@ -109,6 +119,7 @@ static struct task_struct *stats_task;
static struct task_struct *shuffler_task;
static struct task_struct *stutter_task;
static struct task_struct *fqs_task;
static struct task_struct *boost_tasks[NR_CPUS];
#define RCU_TORTURE_PIPE_LEN 10
@@ -134,6 +145,12 @@ static atomic_t n_rcu_torture_alloc_fail;
static atomic_t n_rcu_torture_free;
static atomic_t n_rcu_torture_mberror;
static atomic_t n_rcu_torture_error;
static long n_rcu_torture_boost_ktrerror;
static long n_rcu_torture_boost_rterror;
static long n_rcu_torture_boost_allocerror;
static long n_rcu_torture_boost_afferror;
static long n_rcu_torture_boost_failure;
static long n_rcu_torture_boosts;
static long n_rcu_torture_timers;
static struct list_head rcu_torture_removed;
static cpumask_var_t shuffle_tmp_mask;
@@ -147,6 +164,16 @@ static int stutter_pause_test;
#endif
int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
#ifdef CONFIG_RCU_BOOST
#define rcu_can_boost() 1
#else /* #ifdef CONFIG_RCU_BOOST */
#define rcu_can_boost() 0
#endif /* #else #ifdef CONFIG_RCU_BOOST */
static unsigned long boost_starttime; /* jiffies of next boost test start. */
DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
/* and boost task create/destroy. */
/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
#define FULLSTOP_DONTSTOP 0 /* Normal operation. */
@@ -277,6 +304,7 @@ struct rcu_torture_ops {
void (*fqs)(void);
int (*stats)(char *page);
int irq_capable;
int can_boost;
char *name;
};
@@ -366,6 +394,7 @@ static struct rcu_torture_ops rcu_ops = {
.fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.can_boost = rcu_can_boost(),
.name = "rcu"
};
@@ -408,6 +437,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
.fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.can_boost = rcu_can_boost(),
.name = "rcu_sync"
};
@@ -424,6 +454,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
.fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.can_boost = rcu_can_boost(),
.name = "rcu_expedited"
};
@@ -683,6 +714,110 @@ static struct rcu_torture_ops sched_expedited_ops = {
.name = "sched_expedited"
};
/*
* RCU torture priority-boost testing. Runs one real-time thread per
* CPU for moderate bursts, repeatedly registering RCU callbacks and
* spinning waiting for them to be invoked. If a given callback takes
* too long to be invoked, we assume that priority inversion has occurred.
*/
struct rcu_boost_inflight {
struct rcu_head rcu;
int inflight;
};
static void rcu_torture_boost_cb(struct rcu_head *head)
{
struct rcu_boost_inflight *rbip =
container_of(head, struct rcu_boost_inflight, rcu);
smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */
rbip->inflight = 0;
}
static int rcu_torture_boost(void *arg)
{
unsigned long call_rcu_time;
unsigned long endtime;
unsigned long oldstarttime;
struct rcu_boost_inflight rbi = { .inflight = 0 };
struct sched_param sp;
VERBOSE_PRINTK_STRING("rcu_torture_boost started");
/* Set real-time priority. */
sp.sched_priority = 1;
if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) {
VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!");
n_rcu_torture_boost_rterror++;
}
/* Each pass through the following loop does one boost-test cycle. */
do {
/* Wait for the next test interval. */
oldstarttime = boost_starttime;
while (jiffies - oldstarttime > ULONG_MAX / 2) {
schedule_timeout_uninterruptible(1);
rcu_stutter_wait("rcu_torture_boost");
if (kthread_should_stop() ||
fullstop != FULLSTOP_DONTSTOP)
goto checkwait;
}
/* Do one boost-test interval. */
endtime = oldstarttime + test_boost_duration * HZ;
call_rcu_time = jiffies;
while (jiffies - endtime > ULONG_MAX / 2) {
/* If we don't have a callback in flight, post one. */
if (!rbi.inflight) {
smp_mb(); /* RCU core before ->inflight = 1. */
rbi.inflight = 1;
call_rcu(&rbi.rcu, rcu_torture_boost_cb);
if (jiffies - call_rcu_time >
test_boost_duration * HZ - HZ / 2) {
VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed");
n_rcu_torture_boost_failure++;
}
call_rcu_time = jiffies;
}
cond_resched();
rcu_stutter_wait("rcu_torture_boost");
if (kthread_should_stop() ||
fullstop != FULLSTOP_DONTSTOP)
goto checkwait;
}
/*
* Set the start time of the next test interval.
* Yes, this is vulnerable to long delays, but such
* delays simply cause a false negative for the next
* interval. Besides, we are running at RT priority,
* so delays should be relatively rare.
*/
while (oldstarttime == boost_starttime) {
if (mutex_trylock(&boost_mutex)) {
boost_starttime = jiffies +
test_boost_interval * HZ;
n_rcu_torture_boosts++;
mutex_unlock(&boost_mutex);
break;
}
schedule_timeout_uninterruptible(1);
}
/* Go do the stutter. */
checkwait: rcu_stutter_wait("rcu_torture_boost");
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
/* Clean up and exit. */
VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
rcutorture_shutdown_absorb("rcu_torture_boost");
while (!kthread_should_stop() || rbi.inflight)
schedule_timeout_uninterruptible(1);
smp_mb(); /* order accesses to ->inflight before stack-frame death. */
return 0;
}
/*
* RCU torture force-quiescent-state kthread. Repeatedly induces
* bursts of calls to force_quiescent_state(), increasing the probability
@@ -933,7 +1068,8 @@ rcu_torture_printk(char *page)
cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
cnt += sprintf(&page[cnt],
"rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
"rtmbe: %d nt: %ld",
"rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld "
"rtbf: %ld rtb: %ld nt: %ld",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
@@ -941,8 +1077,19 @@ rcu_torture_printk(char *page)
atomic_read(&n_rcu_torture_alloc_fail),
atomic_read(&n_rcu_torture_free),
atomic_read(&n_rcu_torture_mberror),
n_rcu_torture_boost_ktrerror,
n_rcu_torture_boost_rterror,
n_rcu_torture_boost_allocerror,
n_rcu_torture_boost_afferror,
n_rcu_torture_boost_failure,
n_rcu_torture_boosts,
n_rcu_torture_timers);
if (atomic_read(&n_rcu_torture_mberror) != 0)
if (atomic_read(&n_rcu_torture_mberror) != 0 ||
n_rcu_torture_boost_ktrerror != 0 ||
n_rcu_torture_boost_rterror != 0 ||
n_rcu_torture_boost_allocerror != 0 ||
n_rcu_torture_boost_afferror != 0 ||
n_rcu_torture_boost_failure != 0)
cnt += sprintf(&page[cnt], " !!!");
cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
if (i > 1) {
@@ -1094,22 +1241,91 @@ rcu_torture_stutter(void *arg)
}
static inline void
rcu_torture_print_module_parms(char *tag)
rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
{
printk(KERN_ALERT "%s" TORTURE_FLAG
"--- %s: nreaders=%d nfakewriters=%d "
"stat_interval=%d verbose=%d test_no_idle_hz=%d "
"shuffle_interval=%d stutter=%d irqreader=%d "
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n",
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
"test_boost=%d/%d test_boost_interval=%d "
"test_boost_duration=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter);
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
test_boost, cur_ops->can_boost,
test_boost_interval, test_boost_duration);
}
static struct notifier_block rcutorture_nb = {
static struct notifier_block rcutorture_shutdown_nb = {
.notifier_call = rcutorture_shutdown_notify,
};
static void rcutorture_booster_cleanup(int cpu)
{
struct task_struct *t;
if (boost_tasks[cpu] == NULL)
return;
mutex_lock(&boost_mutex);
VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task");
t = boost_tasks[cpu];
boost_tasks[cpu] = NULL;
mutex_unlock(&boost_mutex);
/* This must be outside of the mutex, otherwise deadlock! */
kthread_stop(t);
}
static int rcutorture_booster_init(int cpu)
{
int retval;
if (boost_tasks[cpu] != NULL)
return 0; /* Already created, nothing more to do. */
/* Don't allow time recalculation while creating a new task. */
mutex_lock(&boost_mutex);
VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL,
"rcu_torture_boost");
if (IS_ERR(boost_tasks[cpu])) {
retval = PTR_ERR(boost_tasks[cpu]);
VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
n_rcu_torture_boost_ktrerror++;
boost_tasks[cpu] = NULL;
mutex_unlock(&boost_mutex);
return retval;
}
kthread_bind(boost_tasks[cpu], cpu);
wake_up_process(boost_tasks[cpu]);
mutex_unlock(&boost_mutex);
return 0;
}
static int rcutorture_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
switch (action) {
case CPU_ONLINE:
case CPU_DOWN_FAILED:
(void)rcutorture_booster_init(cpu);
break;
case CPU_DOWN_PREPARE:
rcutorture_booster_cleanup(cpu);
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block rcutorture_cpu_nb = {
.notifier_call = rcutorture_cpu_notify,
};
static void
rcu_torture_cleanup(void)
{
@@ -1127,7 +1343,7 @@ rcu_torture_cleanup(void)
}
fullstop = FULLSTOP_RMMOD;
mutex_unlock(&fullstop_mutex);
unregister_reboot_notifier(&rcutorture_nb);
unregister_reboot_notifier(&rcutorture_shutdown_nb);
if (stutter_task) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
kthread_stop(stutter_task);
@@ -1184,6 +1400,12 @@ rcu_torture_cleanup(void)
kthread_stop(fqs_task);
}
fqs_task = NULL;
if ((test_boost == 1 && cur_ops->can_boost) ||
test_boost == 2) {
unregister_cpu_notifier(&rcutorture_cpu_nb);
for_each_possible_cpu(i)
rcutorture_booster_cleanup(i);
}
/* Wait for all RCU callbacks to fire. */
@@ -1195,9 +1417,9 @@ rcu_torture_cleanup(void)
if (cur_ops->cleanup)
cur_ops->cleanup();
if (atomic_read(&n_rcu_torture_error))
rcu_torture_print_module_parms("End of test: FAILURE");
rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
else
rcu_torture_print_module_parms("End of test: SUCCESS");
rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
}
static int __init
@@ -1242,7 +1464,7 @@ rcu_torture_init(void)
nrealreaders = nreaders;
else
nrealreaders = 2 * num_online_cpus();
rcu_torture_print_module_parms("Start of test");
rcu_torture_print_module_parms(cur_ops, "Start of test");
fullstop = FULLSTOP_DONTSTOP;
/* Set up the freelist. */
@@ -1263,6 +1485,12 @@ rcu_torture_init(void)
atomic_set(&n_rcu_torture_free, 0);
atomic_set(&n_rcu_torture_mberror, 0);
atomic_set(&n_rcu_torture_error, 0);
n_rcu_torture_boost_ktrerror = 0;
n_rcu_torture_boost_rterror = 0;
n_rcu_torture_boost_allocerror = 0;
n_rcu_torture_boost_afferror = 0;
n_rcu_torture_boost_failure = 0;
n_rcu_torture_boosts = 0;
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
atomic_set(&rcu_torture_wcount[i], 0);
for_each_possible_cpu(cpu) {
@@ -1376,7 +1604,27 @@ rcu_torture_init(void)
goto unwind;
}
}
register_reboot_notifier(&rcutorture_nb);
if (test_boost_interval < 1)
test_boost_interval = 1;
if (test_boost_duration < 2)
test_boost_duration = 2;
if ((test_boost == 1 && cur_ops->can_boost) ||
test_boost == 2) {
int retval;
boost_starttime = jiffies + test_boost_interval * HZ;
register_cpu_notifier(&rcutorture_cpu_nb);
for_each_possible_cpu(i) {
if (cpu_is_offline(i))
continue; /* Heuristic: CPU can go offline. */
retval = rcutorture_booster_init(i);
if (retval < 0) {
firsterr = retval;
goto unwind;
}
}
}
register_reboot_notifier(&rcutorture_shutdown_nb);
mutex_unlock(&fullstop_mutex);
return 0;

View File

@@ -67,9 +67,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
.gpnum = -300, \
.completed = -300, \
.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \
.orphan_cbs_list = NULL, \
.orphan_cbs_tail = &structname.orphan_cbs_list, \
.orphan_qlen = 0, \
.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \
.n_force_qs = 0, \
.n_force_qs_ngp = 0, \
@@ -620,9 +617,17 @@ static void __init check_cpu_stall_init(void)
static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
{
if (rdp->gpnum != rnp->gpnum) {
rdp->qs_pending = 1;
rdp->passed_quiesc = 0;
/*
* If the current grace period is waiting for this CPU,
* set up to detect a quiescent state, otherwise don't
* go looking for one.
*/
rdp->gpnum = rnp->gpnum;
if (rnp->qsmask & rdp->grpmask) {
rdp->qs_pending = 1;
rdp->passed_quiesc = 0;
} else
rdp->qs_pending = 0;
}
}
@@ -681,6 +686,24 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
/* Remember that we saw this grace-period completion. */
rdp->completed = rnp->completed;
/*
* If we were in an extended quiescent state, we may have
* missed some grace periods that others CPUs handled on
* our behalf. Catch up with this state to avoid noting
* spurious new grace periods. If another grace period
* has started, then rnp->gpnum will have advanced, so
* we will detect this later on.
*/
if (ULONG_CMP_LT(rdp->gpnum, rdp->completed))
rdp->gpnum = rdp->completed;
/*
* If RCU does not need a quiescent state from this CPU,
* then make sure that this CPU doesn't go looking for one.
*/
if ((rnp->qsmask & rdp->grpmask) == 0)
rdp->qs_pending = 0;
}
}
@@ -984,53 +1007,31 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
#ifdef CONFIG_HOTPLUG_CPU
/*
* Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the
* specified flavor of RCU. The callbacks will be adopted by the next
* _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever
* comes first. Because this is invoked from the CPU_DYING notifier,
* irqs are already disabled.
* Move a dying CPU's RCU callbacks to online CPU's callback list.
* Synchronization is not required because this function executes
* in stop_machine() context.
*/
static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
static void rcu_send_cbs_to_online(struct rcu_state *rsp)
{
int i;
/* current DYING CPU is cleared in the cpu_online_mask */
int receive_cpu = cpumask_any(cpu_online_mask);
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
if (rdp->nxtlist == NULL)
return; /* irqs disabled, so comparison is stable. */
raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
*rsp->orphan_cbs_tail = rdp->nxtlist;
rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
*receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
receive_rdp->qlen += rdp->qlen;
receive_rdp->n_cbs_adopted += rdp->qlen;
rdp->n_cbs_orphaned += rdp->qlen;
rdp->nxtlist = NULL;
for (i = 0; i < RCU_NEXT_SIZE; i++)
rdp->nxttail[i] = &rdp->nxtlist;
rsp->orphan_qlen += rdp->qlen;
rdp->n_cbs_orphaned += rdp->qlen;
rdp->qlen = 0;
raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
}
/*
* Adopt previously orphaned RCU callbacks.
*/
static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
{
unsigned long flags;
struct rcu_data *rdp;
raw_spin_lock_irqsave(&rsp->onofflock, flags);
rdp = this_cpu_ptr(rsp->rda);
if (rsp->orphan_cbs_list == NULL) {
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
return;
}
*rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail;
rdp->qlen += rsp->orphan_qlen;
rdp->n_cbs_adopted += rsp->orphan_qlen;
rsp->orphan_cbs_list = NULL;
rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
rsp->orphan_qlen = 0;
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
}
/*
@@ -1081,8 +1082,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (need_report & RCU_OFL_TASKS_EXP_GP)
rcu_report_exp_rnp(rsp, rnp);
rcu_adopt_orphan_cbs(rsp);
}
/*
@@ -1100,11 +1099,7 @@ static void rcu_offline_cpu(int cpu)
#else /* #ifdef CONFIG_HOTPLUG_CPU */
static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
{
}
static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
static void rcu_send_cbs_to_online(struct rcu_state *rsp)
{
}
@@ -1440,22 +1435,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
*/
local_irq_save(flags);
rdp = this_cpu_ptr(rsp->rda);
rcu_process_gp_end(rsp, rdp);
check_for_new_grace_period(rsp, rdp);
/* Add the callback to our list. */
*rdp->nxttail[RCU_NEXT_TAIL] = head;
rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
/* Start a new grace period if one not already started. */
if (!rcu_gp_in_progress(rsp)) {
unsigned long nestflag;
struct rcu_node *rnp_root = rcu_get_root(rsp);
raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
}
/*
* Force the grace period if too many callbacks or too long waiting.
* Enforce hysteresis, and don't invoke force_quiescent_state()
@@ -1464,12 +1448,27 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
* is the only one waiting for a grace period to complete.
*/
if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
rdp->blimit = LONG_MAX;
if (rsp->n_force_qs == rdp->n_force_qs_snap &&
*rdp->nxttail[RCU_DONE_TAIL] != head)
force_quiescent_state(rsp, 0);
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->qlen_last_fqs_check = rdp->qlen;
/* Are we ignoring a completed grace period? */
rcu_process_gp_end(rsp, rdp);
check_for_new_grace_period(rsp, rdp);
/* Start a new grace period if one not already started. */
if (!rcu_gp_in_progress(rsp)) {
unsigned long nestflag;
struct rcu_node *rnp_root = rcu_get_root(rsp);
raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
} else {
/* Give the grace period a kick. */
rdp->blimit = LONG_MAX;
if (rsp->n_force_qs == rdp->n_force_qs_snap &&
*rdp->nxttail[RCU_DONE_TAIL] != head)
force_quiescent_state(rsp, 0);
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->qlen_last_fqs_check = rdp->qlen;
}
} else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
force_quiescent_state(rsp, 1);
local_irq_restore(flags);
@@ -1699,13 +1698,12 @@ static void _rcu_barrier(struct rcu_state *rsp,
* decrement rcu_barrier_cpu_count -- otherwise the first CPU
* might complete its grace period before all of the other CPUs
* did their increment, causing this function to return too
* early.
* early. Note that on_each_cpu() disables irqs, which prevents
* any CPUs from coming online or going offline until each online
* CPU has queued its RCU-barrier callback.
*/
atomic_set(&rcu_barrier_cpu_count, 1);
preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */
rcu_adopt_orphan_cbs(rsp);
on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */
if (atomic_dec_and_test(&rcu_barrier_cpu_count))
complete(&rcu_barrier_completion);
wait_for_completion(&rcu_barrier_completion);
@@ -1831,18 +1829,13 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
case CPU_DYING:
case CPU_DYING_FROZEN:
/*
* preempt_disable() in _rcu_barrier() prevents stop_machine(),
* so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
* returns, all online cpus have queued rcu_barrier_func().
* The dying CPU clears its cpu_online_mask bit and
* moves all of its RCU callbacks to ->orphan_cbs_list
* in the context of stop_machine(), so subsequent calls
* to _rcu_barrier() will adopt these callbacks and only
* then queue rcu_barrier_func() on all remaining CPUs.
* The whole machine is "stopped" except this CPU, so we can
* touch any data without introducing corruption. We send the
* dying CPU's callbacks to an arbitrarily chosen online CPU.
*/
rcu_send_cbs_to_orphanage(&rcu_bh_state);
rcu_send_cbs_to_orphanage(&rcu_sched_state);
rcu_preempt_send_cbs_to_orphanage();
rcu_send_cbs_to_online(&rcu_bh_state);
rcu_send_cbs_to_online(&rcu_sched_state);
rcu_preempt_send_cbs_to_online();
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
@@ -1880,8 +1873,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
{
int i;
for (i = NUM_RCU_LVLS - 1; i >= 0; i--)
for (i = NUM_RCU_LVLS - 1; i > 0; i--)
rsp->levelspread[i] = CONFIG_RCU_FANOUT;
rsp->levelspread[0] = RCU_FANOUT_LEAF;
}
#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
static void __init rcu_init_levelspread(struct rcu_state *rsp)

View File

@@ -31,46 +31,51 @@
/*
* Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
* In theory, it should be possible to add more levels straightforwardly.
* In practice, this has not been tested, so there is probably some
* bug somewhere.
* In practice, this did work well going from three levels to four.
* Of course, your mileage may vary.
*/
#define MAX_RCU_LVLS 4
#define RCU_FANOUT (CONFIG_RCU_FANOUT)
#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT)
#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT)
#define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT)
#if CONFIG_RCU_FANOUT > 16
#define RCU_FANOUT_LEAF 16
#else /* #if CONFIG_RCU_FANOUT > 16 */
#define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT)
#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
#define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
#if NR_CPUS <= RCU_FANOUT
#if NR_CPUS <= RCU_FANOUT_1
# define NUM_RCU_LVLS 1
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 (NR_CPUS)
# define NUM_RCU_LVL_2 0
# define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_SQ
#elif NR_CPUS <= RCU_FANOUT_2
# define NUM_RCU_LVLS 2
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_2 (NR_CPUS)
# define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_CUBE
#elif NR_CPUS <= RCU_FANOUT_3
# define NUM_RCU_LVLS 3
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
# define NUM_RCU_LVL_3 NR_CPUS
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_3 (NR_CPUS)
# define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_FOURTH
#elif NR_CPUS <= RCU_FANOUT_4
# define NUM_RCU_LVLS 4
# define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
# define NUM_RCU_LVL_4 NR_CPUS
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_4 (NR_CPUS)
#else
# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
#endif /* #if (NR_CPUS) <= RCU_FANOUT */
#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
@@ -203,8 +208,8 @@ struct rcu_data {
long qlen_last_fqs_check;
/* qlen at last check for QS forcing */
unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */
unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */
unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */
unsigned long n_force_qs_snap;
/* did other CPU force QS recently? */
long blimit; /* Upper limit on a processed batch */
@@ -309,15 +314,7 @@ struct rcu_state {
/* End of fields guarded by root rcu_node's lock. */
raw_spinlock_t onofflock; /* exclude on/offline and */
/* starting new GP. Also */
/* protects the following */
/* orphan_cbs fields. */
struct rcu_head *orphan_cbs_list; /* list of rcu_head structs */
/* orphaned by all CPUs in */
/* a given leaf rcu_node */
/* going offline. */
struct rcu_head **orphan_cbs_tail; /* And tail pointer. */
long orphan_qlen; /* Number of orphaned cbs. */
/* starting new GP. */
raw_spinlock_t fqslock; /* Only one task forcing */
/* quiescent states. */
unsigned long jiffies_force_qs; /* Time at which to invoke */
@@ -390,7 +387,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
static int rcu_preempt_pending(int cpu);
static int rcu_preempt_needs_cpu(int cpu);
static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
static void rcu_preempt_send_cbs_to_orphanage(void);
static void rcu_preempt_send_cbs_to_online(void);
static void __init __rcu_init_preempt(void);
static void rcu_needs_cpu_flush(void);

View File

@@ -25,6 +25,7 @@
*/
#include <linux/delay.h>
#include <linux/stop_machine.h>
/*
* Check the RCU kernel configuration parameters and print informative
@@ -773,11 +774,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
}
/*
* Move preemptable RCU's callbacks to ->orphan_cbs_list.
* Move preemptable RCU's callbacks from dying CPU to other online CPU.
*/
static void rcu_preempt_send_cbs_to_orphanage(void)
static void rcu_preempt_send_cbs_to_online(void)
{
rcu_send_cbs_to_orphanage(&rcu_preempt_state);
rcu_send_cbs_to_online(&rcu_preempt_state);
}
/*
@@ -1001,7 +1002,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
/*
* Because there is no preemptable RCU, there are no callbacks to move.
*/
static void rcu_preempt_send_cbs_to_orphanage(void)
static void rcu_preempt_send_cbs_to_online(void)
{
}
@@ -1014,6 +1015,132 @@ static void __init __rcu_init_preempt(void)
#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
#ifndef CONFIG_SMP
void synchronize_sched_expedited(void)
{
cond_resched();
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#else /* #ifndef CONFIG_SMP */
static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
static int synchronize_sched_expedited_cpu_stop(void *data)
{
/*
* There must be a full memory barrier on each affected CPU
* between the time that try_stop_cpus() is called and the
* time that it returns.
*
* In the current initial implementation of cpu_stop, the
* above condition is already met when the control reaches
* this point and the following smp_mb() is not strictly
* necessary. Do smp_mb() anyway for documentation and
* robustness against future implementation changes.
*/
smp_mb(); /* See above comment block. */
return 0;
}
/*
* Wait for an rcu-sched grace period to elapse, but use "big hammer"
* approach to force grace period to end quickly. This consumes
* significant time on all CPUs, and is thus not recommended for
* any sort of common-case code.
*
* Note that it is illegal to call this function while holding any
* lock that is acquired by a CPU-hotplug notifier. Failing to
* observe this restriction will result in deadlock.
*
* This implementation can be thought of as an application of ticket
* locking to RCU, with sync_sched_expedited_started and
* sync_sched_expedited_done taking on the roles of the halves
* of the ticket-lock word. Each task atomically increments
* sync_sched_expedited_started upon entry, snapshotting the old value,
* then attempts to stop all the CPUs. If this succeeds, then each
* CPU will have executed a context switch, resulting in an RCU-sched
* grace period. We are then done, so we use atomic_cmpxchg() to
* update sync_sched_expedited_done to match our snapshot -- but
* only if someone else has not already advanced past our snapshot.
*
* On the other hand, if try_stop_cpus() fails, we check the value
* of sync_sched_expedited_done. If it has advanced past our
* initial snapshot, then someone else must have forced a grace period
* some time after we took our snapshot. In this case, our work is
* done for us, and we can simply return. Otherwise, we try again,
* but keep our initial snapshot for purposes of checking for someone
* doing our work for us.
*
* If we fail too many times in a row, we fall back to synchronize_sched().
*/
void synchronize_sched_expedited(void)
{
int firstsnap, s, snap, trycount = 0;
/* Note that atomic_inc_return() implies full memory barrier. */
firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
get_online_cpus();
/*
* Each pass through the following loop attempts to force a
* context switch on each CPU.
*/
while (try_stop_cpus(cpu_online_mask,
synchronize_sched_expedited_cpu_stop,
NULL) == -EAGAIN) {
put_online_cpus();
/* No joy, try again later. Or just synchronize_sched(). */
if (trycount++ < 10)
udelay(trycount * num_online_cpus());
else {
synchronize_sched();
return;
}
/* Check to see if someone else did our work for us. */
s = atomic_read(&sync_sched_expedited_done);
if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
smp_mb(); /* ensure test happens before caller kfree */
return;
}
/*
* Refetching sync_sched_expedited_started allows later
* callers to piggyback on our grace period. We subtract
* 1 to get the same token that the last incrementer got.
* We retry after they started, so our grace period works
* for them, and they started after our first try, so their
* grace period works for us.
*/
get_online_cpus();
snap = atomic_read(&sync_sched_expedited_started) - 1;
smp_mb(); /* ensure read is before try_stop_cpus(). */
}
/*
* Everyone up to our most recent fetch is covered by our grace
* period. Update the counter, but only if our work is still
* relevant -- which it won't be if someone who started later
* than we did beat us to the punch.
*/
do {
s = atomic_read(&sync_sched_expedited_done);
if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
smp_mb(); /* ensure test happens before caller kfree */
break;
}
} while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
put_online_cpus();
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#endif /* #else #ifndef CONFIG_SMP */
#if !defined(CONFIG_RCU_FAST_NO_HZ)
/*

View File

@@ -166,13 +166,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
gpnum = rsp->gpnum;
seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
"nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n",
"nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
rsp->completed, gpnum, rsp->signaled,
(long)(rsp->jiffies_force_qs - jiffies),
(int)(jiffies & 0xffff),
rsp->n_force_qs, rsp->n_force_qs_ngp,
rsp->n_force_qs - rsp->n_force_qs_ngp,
rsp->n_force_qs_lh, rsp->orphan_qlen);
rsp->n_force_qs_lh);
for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
if (rnp->level != level) {
seq_puts(m, "\n");
@@ -300,7 +300,7 @@ static const struct file_operations rcu_pending_fops = {
static struct dentry *rcudir;
static int __init rcuclassic_trace_init(void)
static int __init rcutree_trace_init(void)
{
struct dentry *retval;
@@ -337,14 +337,14 @@ free_out:
return 1;
}
static void __exit rcuclassic_trace_cleanup(void)
static void __exit rcutree_trace_cleanup(void)
{
debugfs_remove_recursive(rcudir);
}
module_init(rcuclassic_trace_init);
module_exit(rcuclassic_trace_cleanup);
module_init(rcutree_trace_init);
module_exit(rcutree_trace_cleanup);
MODULE_AUTHOR("Paul E. McKenney");
MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");

View File

@@ -9534,72 +9534,3 @@ struct cgroup_subsys cpuacct_subsys = {
};
#endif /* CONFIG_CGROUP_CPUACCT */
#ifndef CONFIG_SMP
void synchronize_sched_expedited(void)
{
barrier();
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#else /* #ifndef CONFIG_SMP */
static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
static int synchronize_sched_expedited_cpu_stop(void *data)
{
/*
* There must be a full memory barrier on each affected CPU
* between the time that try_stop_cpus() is called and the
* time that it returns.
*
* In the current initial implementation of cpu_stop, the
* above condition is already met when the control reaches
* this point and the following smp_mb() is not strictly
* necessary. Do smp_mb() anyway for documentation and
* robustness against future implementation changes.
*/
smp_mb(); /* See above comment block. */
return 0;
}
/*
* Wait for an rcu-sched grace period to elapse, but use "big hammer"
* approach to force grace period to end quickly. This consumes
* significant time on all CPUs, and is thus not recommended for
* any sort of common-case code.
*
* Note that it is illegal to call this function while holding any
* lock that is acquired by a CPU-hotplug notifier. Failing to
* observe this restriction will result in deadlock.
*/
void synchronize_sched_expedited(void)
{
int snap, trycount = 0;
smp_mb(); /* ensure prior mod happens before capturing snap. */
snap = atomic_read(&synchronize_sched_expedited_count) + 1;
get_online_cpus();
while (try_stop_cpus(cpu_online_mask,
synchronize_sched_expedited_cpu_stop,
NULL) == -EAGAIN) {
put_online_cpus();
if (trycount++ < 10)
udelay(trycount * num_online_cpus());
else {
synchronize_sched();
return;
}
if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
smp_mb(); /* ensure test happens before caller kfree */
return;
}
get_online_cpus();
}
atomic_inc(&synchronize_sched_expedited_count);
smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
put_online_cpus();
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#endif /* #else #ifndef CONFIG_SMP */

View File

@@ -31,6 +31,7 @@
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/srcu.h>
static int init_srcu_struct_fields(struct srcu_struct *sp)
@@ -203,9 +204,14 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
* all srcu_read_lock() calls using the old counters have completed.
* Their corresponding critical sections might well be still
* executing, but the srcu_read_lock() primitives themselves
* will have finished executing.
* will have finished executing. We initially give readers
* an arbitrarily chosen 10 microseconds to get out of their
* SRCU read-side critical sections, then loop waiting 1/HZ
* seconds per iteration.
*/
if (srcu_readers_active_idx(sp, idx))
udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY);
while (srcu_readers_active_idx(sp, idx))
schedule_timeout_interruptible(1);