Merge branches 'doc.2014.07.08a', 'fixes.2014.07.09a', 'maintainers.2014.07.08b', 'nocbs.2014.07.07a' and 'torture.2014.07.07a' into HEAD
doc.2014.07.08a: Documentation updates. fixes.2014.07.09a: Miscellaneous fixes. maintainers.2014.07.08b: Maintainership updates. nocbs.2014.07.07a: Callback-offloading fixes. torture.2014.07.07a: Torture-test updates.
This commit is contained in:
@@ -99,6 +99,10 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
|
||||
|
||||
void kfree(const void *);
|
||||
|
||||
/*
|
||||
* Reclaim the specified callback, either by invoking it (non-lazy case)
|
||||
* or freeing it directly (lazy case). Return true if lazy, false otherwise.
|
||||
*/
|
||||
static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
|
||||
{
|
||||
unsigned long offset = (unsigned long)head->func;
|
||||
@@ -108,12 +112,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
|
||||
RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
|
||||
kfree((void *)head - offset);
|
||||
rcu_lock_release(&rcu_callback_map);
|
||||
return 1;
|
||||
return true;
|
||||
} else {
|
||||
RCU_TRACE(trace_rcu_invoke_callback(rn, head));
|
||||
head->func(head);
|
||||
rcu_lock_release(&rcu_callback_map);
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -298,9 +298,9 @@ int __srcu_read_lock(struct srcu_struct *sp)
|
||||
|
||||
idx = ACCESS_ONCE(sp->completed) & 0x1;
|
||||
preempt_disable();
|
||||
ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
|
||||
__this_cpu_inc(sp->per_cpu_ref->c[idx]);
|
||||
smp_mb(); /* B */ /* Avoid leaking the critical section. */
|
||||
ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
|
||||
__this_cpu_inc(sp->per_cpu_ref->seq[idx]);
|
||||
preempt_enable();
|
||||
return idx;
|
||||
}
|
||||
|
@@ -1013,10 +1013,7 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump stacks of all tasks running on stalled CPUs. This is a fallback
|
||||
* for architectures that do not implement trigger_all_cpu_backtrace().
|
||||
* The NMI-triggered stack traces are more accurate because they are
|
||||
* printed by the target CPU.
|
||||
* Dump stacks of all tasks running on stalled CPUs.
|
||||
*/
|
||||
static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
|
||||
{
|
||||
@@ -1094,7 +1091,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
(long)rsp->gpnum, (long)rsp->completed, totqlen);
|
||||
if (ndetected == 0)
|
||||
pr_err("INFO: Stall ended before state dump start\n");
|
||||
else if (!trigger_all_cpu_backtrace())
|
||||
else
|
||||
rcu_dump_cpu_stacks(rsp);
|
||||
|
||||
/* Complain about tasks blocking the grace period. */
|
||||
@@ -1125,8 +1122,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
|
||||
pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
|
||||
jiffies - rsp->gp_start,
|
||||
(long)rsp->gpnum, (long)rsp->completed, totqlen);
|
||||
if (!trigger_all_cpu_backtrace())
|
||||
dump_stack();
|
||||
rcu_dump_cpu_stacks(rsp);
|
||||
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall)))
|
||||
@@ -1305,10 +1301,16 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
|
||||
* believe that a grace period is in progress, then we must wait
|
||||
* for the one following, which is in "c". Because our request
|
||||
* will be noticed at the end of the current grace period, we don't
|
||||
* need to explicitly start one.
|
||||
* need to explicitly start one. We only do the lockless check
|
||||
* of rnp_root's fields if the current rcu_node structure thinks
|
||||
* there is no grace period in flight, and because we hold rnp->lock,
|
||||
* the only possible change is when rnp_root's two fields are
|
||||
* equal, in which case rnp_root->gpnum might be concurrently
|
||||
* incremented. But that is OK, as it will just result in our
|
||||
* doing some extra useless work.
|
||||
*/
|
||||
if (rnp->gpnum != rnp->completed ||
|
||||
ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
|
||||
ACCESS_ONCE(rnp_root->gpnum) != ACCESS_ONCE(rnp_root->completed)) {
|
||||
rnp->need_future_gp[c & 0x1]++;
|
||||
trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
|
||||
goto out;
|
||||
@@ -1645,11 +1647,6 @@ static int rcu_gp_init(struct rcu_state *rsp)
|
||||
rnp->level, rnp->grplo,
|
||||
rnp->grphi, rnp->qsmask);
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
#ifdef CONFIG_PROVE_RCU_DELAY
|
||||
if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
|
||||
system_state == SYSTEM_RUNNING)
|
||||
udelay(200);
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
@@ -2347,7 +2344,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
}
|
||||
smp_mb(); /* List handling before counting for rcu_barrier(). */
|
||||
rdp->qlen_lazy -= count_lazy;
|
||||
ACCESS_ONCE(rdp->qlen) -= count;
|
||||
ACCESS_ONCE(rdp->qlen) = rdp->qlen - count;
|
||||
rdp->n_cbs_invoked += count;
|
||||
|
||||
/* Reinstate batch limit if we have worked down the excess. */
|
||||
@@ -2485,14 +2482,14 @@ static void force_quiescent_state(struct rcu_state *rsp)
|
||||
struct rcu_node *rnp_old = NULL;
|
||||
|
||||
/* Funnel through hierarchy to reduce memory contention. */
|
||||
rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
|
||||
rnp = __this_cpu_read(rsp->rda->mynode);
|
||||
for (; rnp != NULL; rnp = rnp->parent) {
|
||||
ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
|
||||
!raw_spin_trylock(&rnp->fqslock);
|
||||
if (rnp_old != NULL)
|
||||
raw_spin_unlock(&rnp_old->fqslock);
|
||||
if (ret) {
|
||||
ACCESS_ONCE(rsp->n_force_qs_lh)++;
|
||||
rsp->n_force_qs_lh++;
|
||||
return;
|
||||
}
|
||||
rnp_old = rnp;
|
||||
@@ -2504,7 +2501,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
|
||||
smp_mb__after_unlock_lock();
|
||||
raw_spin_unlock(&rnp_old->fqslock);
|
||||
if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
|
||||
ACCESS_ONCE(rsp->n_force_qs_lh)++;
|
||||
rsp->n_force_qs_lh++;
|
||||
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
|
||||
return; /* Someone beat us to it. */
|
||||
}
|
||||
@@ -2662,7 +2659,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
|
||||
WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */
|
||||
if (debug_rcu_head_queue(head)) {
|
||||
/* Probable double call_rcu(), so leak the callback. */
|
||||
ACCESS_ONCE(head->func) = rcu_leak_callback;
|
||||
@@ -2693,7 +2690,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
|
||||
local_irq_restore(flags);
|
||||
return;
|
||||
}
|
||||
ACCESS_ONCE(rdp->qlen)++;
|
||||
ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1;
|
||||
if (lazy)
|
||||
rdp->qlen_lazy++;
|
||||
else
|
||||
@@ -3257,7 +3254,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
|
||||
* ACCESS_ONCE() to prevent the compiler from speculating
|
||||
* the increment to precede the early-exit check.
|
||||
*/
|
||||
ACCESS_ONCE(rsp->n_barrier_done)++;
|
||||
ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1;
|
||||
WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
|
||||
_rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
|
||||
smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
|
||||
@@ -3307,7 +3304,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
|
||||
|
||||
/* Increment ->n_barrier_done to prevent duplicate work. */
|
||||
smp_mb(); /* Keep increment after above mechanism. */
|
||||
ACCESS_ONCE(rsp->n_barrier_done)++;
|
||||
ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1;
|
||||
WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
|
||||
_rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
|
||||
smp_mb(); /* Keep increment before caller's subsequent code. */
|
||||
@@ -3564,14 +3561,16 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
|
||||
static void __init rcu_init_one(struct rcu_state *rsp,
|
||||
struct rcu_data __percpu *rda)
|
||||
{
|
||||
static char *buf[] = { "rcu_node_0",
|
||||
"rcu_node_1",
|
||||
"rcu_node_2",
|
||||
"rcu_node_3" }; /* Match MAX_RCU_LVLS */
|
||||
static char *fqs[] = { "rcu_node_fqs_0",
|
||||
"rcu_node_fqs_1",
|
||||
"rcu_node_fqs_2",
|
||||
"rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
|
||||
static const char * const buf[] = {
|
||||
"rcu_node_0",
|
||||
"rcu_node_1",
|
||||
"rcu_node_2",
|
||||
"rcu_node_3" }; /* Match MAX_RCU_LVLS */
|
||||
static const char * const fqs[] = {
|
||||
"rcu_node_fqs_0",
|
||||
"rcu_node_fqs_1",
|
||||
"rcu_node_fqs_2",
|
||||
"rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
|
||||
static u8 fl_mask = 0x1;
|
||||
int cpustride = 1;
|
||||
int i;
|
||||
|
@@ -172,6 +172,14 @@ struct rcu_node {
|
||||
/* queued on this rcu_node structure that */
|
||||
/* are blocking the current grace period, */
|
||||
/* there can be no such task. */
|
||||
struct completion boost_completion;
|
||||
/* Used to ensure that the rt_mutex used */
|
||||
/* to carry out the boosting is fully */
|
||||
/* released with no future boostee accesses */
|
||||
/* before that rt_mutex is re-initialized. */
|
||||
struct rt_mutex boost_mtx;
|
||||
/* Used only for the priority-boosting */
|
||||
/* side effect, not as a lock. */
|
||||
unsigned long boost_time;
|
||||
/* When to start boosting (jiffies). */
|
||||
struct task_struct *boost_kthread_task;
|
||||
@@ -334,11 +342,29 @@ struct rcu_data {
|
||||
struct rcu_head **nocb_tail;
|
||||
atomic_long_t nocb_q_count; /* # CBs waiting for kthread */
|
||||
atomic_long_t nocb_q_count_lazy; /* (approximate). */
|
||||
struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
|
||||
struct rcu_head **nocb_follower_tail;
|
||||
atomic_long_t nocb_follower_count; /* # CBs ready to invoke. */
|
||||
atomic_long_t nocb_follower_count_lazy; /* (approximate). */
|
||||
int nocb_p_count; /* # CBs being invoked by kthread */
|
||||
int nocb_p_count_lazy; /* (approximate). */
|
||||
wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
|
||||
struct task_struct *nocb_kthread;
|
||||
bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
|
||||
|
||||
/* The following fields are used by the leader, hence own cacheline. */
|
||||
struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
|
||||
/* CBs waiting for GP. */
|
||||
struct rcu_head **nocb_gp_tail;
|
||||
long nocb_gp_count;
|
||||
long nocb_gp_count_lazy;
|
||||
bool nocb_leader_wake; /* Is the nocb leader thread awake? */
|
||||
struct rcu_data *nocb_next_follower;
|
||||
/* Next follower in wakeup chain. */
|
||||
|
||||
/* The following fields are used by the follower, hence new cachline. */
|
||||
struct rcu_data *nocb_leader ____cacheline_internodealigned_in_smp;
|
||||
/* Leader CPU takes GP-end wakeups. */
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
|
||||
/* 8) RCU CPU stall data. */
|
||||
@@ -587,8 +613,14 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
|
||||
/* Sum up queue lengths for tracing. */
|
||||
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
|
||||
{
|
||||
*ql = atomic_long_read(&rdp->nocb_q_count) + rdp->nocb_p_count;
|
||||
*qll = atomic_long_read(&rdp->nocb_q_count_lazy) + rdp->nocb_p_count_lazy;
|
||||
*ql = atomic_long_read(&rdp->nocb_q_count) +
|
||||
rdp->nocb_p_count +
|
||||
atomic_long_read(&rdp->nocb_follower_count) +
|
||||
rdp->nocb_p_count + rdp->nocb_gp_count;
|
||||
*qll = atomic_long_read(&rdp->nocb_q_count_lazy) +
|
||||
rdp->nocb_p_count_lazy +
|
||||
atomic_long_read(&rdp->nocb_follower_count_lazy) +
|
||||
rdp->nocb_p_count_lazy + rdp->nocb_gp_count_lazy;
|
||||
}
|
||||
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
|
||||
|
@@ -33,6 +33,7 @@
|
||||
#define RCU_KTHREAD_PRIO 1
|
||||
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
#include "../locking/rtmutex_common.h"
|
||||
#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
|
||||
#else
|
||||
#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
|
||||
@@ -336,7 +337,7 @@ void rcu_read_unlock_special(struct task_struct *t)
|
||||
unsigned long flags;
|
||||
struct list_head *np;
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
struct rt_mutex *rbmp = NULL;
|
||||
bool drop_boost_mutex = false;
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
struct rcu_node *rnp;
|
||||
int special;
|
||||
@@ -398,11 +399,8 @@ void rcu_read_unlock_special(struct task_struct *t)
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
if (&t->rcu_node_entry == rnp->boost_tasks)
|
||||
rnp->boost_tasks = np;
|
||||
/* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
|
||||
if (t->rcu_boost_mutex) {
|
||||
rbmp = t->rcu_boost_mutex;
|
||||
t->rcu_boost_mutex = NULL;
|
||||
}
|
||||
/* Snapshot ->boost_mtx ownership with rcu_node lock held. */
|
||||
drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
|
||||
/*
|
||||
@@ -427,8 +425,10 @@ void rcu_read_unlock_special(struct task_struct *t)
|
||||
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
/* Unboost if we were boosted. */
|
||||
if (rbmp)
|
||||
rt_mutex_unlock(rbmp);
|
||||
if (drop_boost_mutex) {
|
||||
rt_mutex_unlock(&rnp->boost_mtx);
|
||||
complete(&rnp->boost_completion);
|
||||
}
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
|
||||
/*
|
||||
@@ -988,6 +988,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
|
||||
|
||||
/* Because preemptible RCU does not exist, no quieting of tasks. */
|
||||
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
|
||||
__releases(rnp->lock)
|
||||
{
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
}
|
||||
@@ -1149,7 +1150,6 @@ static void rcu_wake_cond(struct task_struct *t, int status)
|
||||
static int rcu_boost(struct rcu_node *rnp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rt_mutex mtx;
|
||||
struct task_struct *t;
|
||||
struct list_head *tb;
|
||||
|
||||
@@ -1200,11 +1200,15 @@ static int rcu_boost(struct rcu_node *rnp)
|
||||
* section.
|
||||
*/
|
||||
t = container_of(tb, struct task_struct, rcu_node_entry);
|
||||
rt_mutex_init_proxy_locked(&mtx, t);
|
||||
t->rcu_boost_mutex = &mtx;
|
||||
rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
|
||||
init_completion(&rnp->boost_completion);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
|
||||
rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
|
||||
/* Lock only for side effect: boosts task t's priority. */
|
||||
rt_mutex_lock(&rnp->boost_mtx);
|
||||
rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
|
||||
|
||||
/* Wait for boostee to be done w/boost_mtx before reinitializing. */
|
||||
wait_for_completion(&rnp->boost_completion);
|
||||
|
||||
return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
|
||||
ACCESS_ONCE(rnp->boost_tasks) != NULL;
|
||||
@@ -1256,6 +1260,7 @@ static int rcu_boost_kthread(void *arg)
|
||||
* about it going away.
|
||||
*/
|
||||
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
|
||||
__releases(rnp->lock)
|
||||
{
|
||||
struct task_struct *t;
|
||||
|
||||
@@ -1491,6 +1496,7 @@ static void rcu_prepare_kthreads(int cpu)
|
||||
#else /* #ifdef CONFIG_RCU_BOOST */
|
||||
|
||||
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
|
||||
__releases(rnp->lock)
|
||||
{
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
}
|
||||
@@ -2059,6 +2065,22 @@ bool rcu_is_nocb_cpu(int cpu)
|
||||
}
|
||||
#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
|
||||
|
||||
/*
|
||||
* Kick the leader kthread for this NOCB group.
|
||||
*/
|
||||
static void wake_nocb_leader(struct rcu_data *rdp, bool force)
|
||||
{
|
||||
struct rcu_data *rdp_leader = rdp->nocb_leader;
|
||||
|
||||
if (!ACCESS_ONCE(rdp_leader->nocb_kthread))
|
||||
return;
|
||||
if (!ACCESS_ONCE(rdp_leader->nocb_leader_wake) || force) {
|
||||
/* Prior xchg orders against prior callback enqueue. */
|
||||
ACCESS_ONCE(rdp_leader->nocb_leader_wake) = true;
|
||||
wake_up(&rdp_leader->nocb_wq);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Enqueue the specified string of rcu_head structures onto the specified
|
||||
* CPU's no-CBs lists. The CPU is specified by rdp, the head of the
|
||||
@@ -2093,7 +2115,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
|
||||
len = atomic_long_read(&rdp->nocb_q_count);
|
||||
if (old_rhpp == &rdp->nocb_head) {
|
||||
if (!irqs_disabled_flags(flags)) {
|
||||
wake_up(&rdp->nocb_wq); /* ... if queue was empty ... */
|
||||
/* ... if queue was empty ... */
|
||||
wake_nocb_leader(rdp, false);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WakeEmpty"));
|
||||
} else {
|
||||
@@ -2103,7 +2126,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
|
||||
}
|
||||
rdp->qlen_last_fqs_check = 0;
|
||||
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
|
||||
wake_up_process(t); /* ... or if many callbacks queued. */
|
||||
/* ... or if many callbacks queued. */
|
||||
wake_nocb_leader(rdp, true);
|
||||
rdp->qlen_last_fqs_check = LONG_MAX / 2;
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf"));
|
||||
} else {
|
||||
@@ -2212,14 +2236,151 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
|
||||
smp_mb(); /* Ensure that CB invocation happens after GP end. */
|
||||
}
|
||||
|
||||
/*
|
||||
* Leaders come here to wait for additional callbacks to show up.
|
||||
* This function does not return until callbacks appear.
|
||||
*/
|
||||
static void nocb_leader_wait(struct rcu_data *my_rdp)
|
||||
{
|
||||
bool firsttime = true;
|
||||
bool gotcbs;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_head **tail;
|
||||
|
||||
wait_again:
|
||||
|
||||
/* Wait for callbacks to appear. */
|
||||
if (!rcu_nocb_poll) {
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
|
||||
wait_event_interruptible(my_rdp->nocb_wq,
|
||||
ACCESS_ONCE(my_rdp->nocb_leader_wake));
|
||||
/* Memory barrier handled by smp_mb() calls below and repoll. */
|
||||
} else if (firsttime) {
|
||||
firsttime = false; /* Don't drown trace log with "Poll"! */
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Poll");
|
||||
}
|
||||
|
||||
/*
|
||||
* Each pass through the following loop checks a follower for CBs.
|
||||
* We are our own first follower. Any CBs found are moved to
|
||||
* nocb_gp_head, where they await a grace period.
|
||||
*/
|
||||
gotcbs = false;
|
||||
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
|
||||
rdp->nocb_gp_head = ACCESS_ONCE(rdp->nocb_head);
|
||||
if (!rdp->nocb_gp_head)
|
||||
continue; /* No CBs here, try next follower. */
|
||||
|
||||
/* Move callbacks to wait-for-GP list, which is empty. */
|
||||
ACCESS_ONCE(rdp->nocb_head) = NULL;
|
||||
rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
|
||||
rdp->nocb_gp_count = atomic_long_xchg(&rdp->nocb_q_count, 0);
|
||||
rdp->nocb_gp_count_lazy =
|
||||
atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
|
||||
gotcbs = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there were no callbacks, sleep a bit, rescan after a
|
||||
* memory barrier, and go retry.
|
||||
*/
|
||||
if (unlikely(!gotcbs)) {
|
||||
if (!rcu_nocb_poll)
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
|
||||
"WokeEmpty");
|
||||
flush_signals(current);
|
||||
schedule_timeout_interruptible(1);
|
||||
|
||||
/* Rescan in case we were a victim of memory ordering. */
|
||||
my_rdp->nocb_leader_wake = false;
|
||||
smp_mb(); /* Ensure _wake false before scan. */
|
||||
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower)
|
||||
if (ACCESS_ONCE(rdp->nocb_head)) {
|
||||
/* Found CB, so short-circuit next wait. */
|
||||
my_rdp->nocb_leader_wake = true;
|
||||
break;
|
||||
}
|
||||
goto wait_again;
|
||||
}
|
||||
|
||||
/* Wait for one grace period. */
|
||||
rcu_nocb_wait_gp(my_rdp);
|
||||
|
||||
/*
|
||||
* We left ->nocb_leader_wake set to reduce cache thrashing.
|
||||
* We clear it now, but recheck for new callbacks while
|
||||
* traversing our follower list.
|
||||
*/
|
||||
my_rdp->nocb_leader_wake = false;
|
||||
smp_mb(); /* Ensure _wake false before scan of ->nocb_head. */
|
||||
|
||||
/* Each pass through the following loop wakes a follower, if needed. */
|
||||
for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
|
||||
if (ACCESS_ONCE(rdp->nocb_head))
|
||||
my_rdp->nocb_leader_wake = true; /* No need to wait. */
|
||||
if (!rdp->nocb_gp_head)
|
||||
continue; /* No CBs, so no need to wake follower. */
|
||||
|
||||
/* Append callbacks to follower's "done" list. */
|
||||
tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail);
|
||||
*tail = rdp->nocb_gp_head;
|
||||
atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count);
|
||||
atomic_long_add(rdp->nocb_gp_count_lazy,
|
||||
&rdp->nocb_follower_count_lazy);
|
||||
if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
|
||||
/*
|
||||
* List was empty, wake up the follower.
|
||||
* Memory barriers supplied by atomic_long_add().
|
||||
*/
|
||||
wake_up(&rdp->nocb_wq);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we (the leader) don't have CBs, go wait some more. */
|
||||
if (!my_rdp->nocb_follower_head)
|
||||
goto wait_again;
|
||||
}
|
||||
|
||||
/*
|
||||
* Followers come here to wait for additional callbacks to show up.
|
||||
* This function does not return until callbacks appear.
|
||||
*/
|
||||
static void nocb_follower_wait(struct rcu_data *rdp)
|
||||
{
|
||||
bool firsttime = true;
|
||||
|
||||
for (;;) {
|
||||
if (!rcu_nocb_poll) {
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
"FollowerSleep");
|
||||
wait_event_interruptible(rdp->nocb_wq,
|
||||
ACCESS_ONCE(rdp->nocb_follower_head));
|
||||
} else if (firsttime) {
|
||||
/* Don't drown trace log with "Poll"! */
|
||||
firsttime = false;
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "Poll");
|
||||
}
|
||||
if (smp_load_acquire(&rdp->nocb_follower_head)) {
|
||||
/* ^^^ Ensure CB invocation follows _head test. */
|
||||
return;
|
||||
}
|
||||
if (!rcu_nocb_poll)
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
"WokeEmpty");
|
||||
flush_signals(current);
|
||||
schedule_timeout_interruptible(1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes
|
||||
* callbacks queued by the corresponding no-CBs CPU.
|
||||
* callbacks queued by the corresponding no-CBs CPU, however, there is
|
||||
* an optional leader-follower relationship so that the grace-period
|
||||
* kthreads don't have to do quite so many wakeups.
|
||||
*/
|
||||
static int rcu_nocb_kthread(void *arg)
|
||||
{
|
||||
int c, cl;
|
||||
bool firsttime = 1;
|
||||
struct rcu_head *list;
|
||||
struct rcu_head *next;
|
||||
struct rcu_head **tail;
|
||||
@@ -2227,41 +2388,22 @@ static int rcu_nocb_kthread(void *arg)
|
||||
|
||||
/* Each pass through this loop invokes one batch of callbacks */
|
||||
for (;;) {
|
||||
/* If not polling, wait for next batch of callbacks. */
|
||||
if (!rcu_nocb_poll) {
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("Sleep"));
|
||||
wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
|
||||
/* Memory barrier provide by xchg() below. */
|
||||
} else if (firsttime) {
|
||||
firsttime = 0;
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("Poll"));
|
||||
}
|
||||
list = ACCESS_ONCE(rdp->nocb_head);
|
||||
if (!list) {
|
||||
if (!rcu_nocb_poll)
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WokeEmpty"));
|
||||
schedule_timeout_interruptible(1);
|
||||
flush_signals(current);
|
||||
continue;
|
||||
}
|
||||
firsttime = 1;
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WokeNonEmpty"));
|
||||
/* Wait for callbacks. */
|
||||
if (rdp->nocb_leader == rdp)
|
||||
nocb_leader_wait(rdp);
|
||||
else
|
||||
nocb_follower_wait(rdp);
|
||||
|
||||
/*
|
||||
* Extract queued callbacks, update counts, and wait
|
||||
* for a grace period to elapse.
|
||||
*/
|
||||
ACCESS_ONCE(rdp->nocb_head) = NULL;
|
||||
tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
|
||||
c = atomic_long_xchg(&rdp->nocb_q_count, 0);
|
||||
cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
|
||||
ACCESS_ONCE(rdp->nocb_p_count) += c;
|
||||
ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
|
||||
rcu_nocb_wait_gp(rdp);
|
||||
/* Pull the ready-to-invoke callbacks onto local list. */
|
||||
list = ACCESS_ONCE(rdp->nocb_follower_head);
|
||||
BUG_ON(!list);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");
|
||||
ACCESS_ONCE(rdp->nocb_follower_head) = NULL;
|
||||
tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
|
||||
c = atomic_long_xchg(&rdp->nocb_follower_count, 0);
|
||||
cl = atomic_long_xchg(&rdp->nocb_follower_count_lazy, 0);
|
||||
rdp->nocb_p_count += c;
|
||||
rdp->nocb_p_count_lazy += cl;
|
||||
|
||||
/* Each pass through the following loop invokes a callback. */
|
||||
trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
|
||||
@@ -2305,7 +2447,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
|
||||
if (!rcu_nocb_need_deferred_wakeup(rdp))
|
||||
return;
|
||||
ACCESS_ONCE(rdp->nocb_defer_wakeup) = false;
|
||||
wake_up(&rdp->nocb_wq);
|
||||
wake_nocb_leader(rdp, false);
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty"));
|
||||
}
|
||||
|
||||
@@ -2314,19 +2456,56 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
|
||||
{
|
||||
rdp->nocb_tail = &rdp->nocb_head;
|
||||
init_waitqueue_head(&rdp->nocb_wq);
|
||||
rdp->nocb_follower_tail = &rdp->nocb_follower_head;
|
||||
}
|
||||
|
||||
/* Create a kthread for each RCU flavor for each no-CBs CPU. */
|
||||
/* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */
|
||||
static int rcu_nocb_leader_stride = -1;
|
||||
module_param(rcu_nocb_leader_stride, int, 0444);
|
||||
|
||||
/*
|
||||
* Create a kthread for each RCU flavor for each no-CBs CPU.
|
||||
* Also initialize leader-follower relationships.
|
||||
*/
|
||||
static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
|
||||
{
|
||||
int cpu;
|
||||
int ls = rcu_nocb_leader_stride;
|
||||
int nl = 0; /* Next leader. */
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */
|
||||
struct rcu_data *rdp_prev = NULL;
|
||||
struct task_struct *t;
|
||||
|
||||
if (rcu_nocb_mask == NULL)
|
||||
return;
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
|
||||
#endif /* #ifdef CONFIG_NO_HZ_FULL */
|
||||
if (ls == -1) {
|
||||
ls = int_sqrt(nr_cpu_ids);
|
||||
rcu_nocb_leader_stride = ls;
|
||||
}
|
||||
|
||||
/*
|
||||
* Each pass through this loop sets up one rcu_data structure and
|
||||
* spawns one rcu_nocb_kthread().
|
||||
*/
|
||||
for_each_cpu(cpu, rcu_nocb_mask) {
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
if (rdp->cpu >= nl) {
|
||||
/* New leader, set up for followers & next leader. */
|
||||
nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
|
||||
rdp->nocb_leader = rdp;
|
||||
rdp_leader = rdp;
|
||||
} else {
|
||||
/* Another follower, link to previous leader. */
|
||||
rdp->nocb_leader = rdp_leader;
|
||||
rdp_prev->nocb_next_follower = rdp;
|
||||
}
|
||||
rdp_prev = rdp;
|
||||
|
||||
/* Spawn the kthread for this CPU. */
|
||||
t = kthread_run(rcu_nocb_kthread, rdp,
|
||||
"rcuo%c/%d", rsp->abbr, cpu);
|
||||
BUG_ON(IS_ERR(t));
|
||||
@@ -2843,12 +3022,16 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
|
||||
*/
|
||||
static void rcu_bind_gp_kthread(void)
|
||||
{
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
int cpu = ACCESS_ONCE(tick_do_timer_cpu);
|
||||
int __maybe_unused cpu;
|
||||
|
||||
if (cpu < 0 || cpu >= nr_cpu_ids)
|
||||
if (!tick_nohz_full_enabled())
|
||||
return;
|
||||
if (raw_smp_processor_id() != cpu)
|
||||
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
|
||||
cpu = tick_do_timer_cpu;
|
||||
if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu)
|
||||
set_cpus_allowed_ptr(current, cpumask_of(cpu));
|
||||
#endif /* #ifdef CONFIG_NO_HZ_FULL */
|
||||
#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
|
||||
if (!is_housekeeping_cpu(raw_smp_processor_id()))
|
||||
housekeeping_affine(current);
|
||||
#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
|
||||
}
|
||||
|
@@ -90,9 +90,6 @@ void __rcu_read_unlock(void)
|
||||
} else {
|
||||
barrier(); /* critical section before exit code. */
|
||||
t->rcu_read_lock_nesting = INT_MIN;
|
||||
#ifdef CONFIG_PROVE_RCU_DELAY
|
||||
udelay(10); /* Make preemption more probable. */
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
|
||||
barrier(); /* assign before ->rcu_read_unlock_special load */
|
||||
if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
|
||||
rcu_read_unlock_special(t);
|
||||
|
@@ -1263,6 +1263,10 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
|
||||
struct sighand_struct *sighand;
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* Disable interrupts early to avoid deadlocks.
|
||||
* See rcu_read_unlock() comment header for details.
|
||||
*/
|
||||
local_irq_save(*flags);
|
||||
rcu_read_lock();
|
||||
sighand = rcu_dereference(tsk->sighand);
|
||||
|
@@ -154,6 +154,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
cpumask_var_t tick_nohz_full_mask;
|
||||
cpumask_var_t housekeeping_mask;
|
||||
bool tick_nohz_full_running;
|
||||
|
||||
static bool can_stop_full_tick(void)
|
||||
@@ -281,6 +282,7 @@ static int __init tick_nohz_full_setup(char *str)
|
||||
int cpu;
|
||||
|
||||
alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
|
||||
alloc_bootmem_cpumask_var(&housekeeping_mask);
|
||||
if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
|
||||
pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
|
||||
return 1;
|
||||
@@ -291,6 +293,8 @@ static int __init tick_nohz_full_setup(char *str)
|
||||
pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
|
||||
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
|
||||
}
|
||||
cpumask_andnot(housekeeping_mask,
|
||||
cpu_possible_mask, tick_nohz_full_mask);
|
||||
tick_nohz_full_running = true;
|
||||
|
||||
return 1;
|
||||
@@ -332,9 +336,15 @@ static int tick_nohz_init_all(void)
|
||||
pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
|
||||
return err;
|
||||
}
|
||||
if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) {
|
||||
pr_err("NO_HZ: Can't allocate not-full dynticks cpumask\n");
|
||||
return err;
|
||||
}
|
||||
err = 0;
|
||||
cpumask_setall(tick_nohz_full_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
|
||||
cpumask_clear(housekeeping_mask);
|
||||
cpumask_set_cpu(smp_processor_id(), housekeeping_mask);
|
||||
tick_nohz_full_running = true;
|
||||
#endif
|
||||
return err;
|
||||
|
@@ -708,7 +708,7 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
|
||||
int ret = 0;
|
||||
|
||||
VERBOSE_TOROUT_STRING(m);
|
||||
*tp = kthread_run(fn, arg, s);
|
||||
*tp = kthread_run(fn, arg, "%s", s);
|
||||
if (IS_ERR(*tp)) {
|
||||
ret = PTR_ERR(*tp);
|
||||
VERBOSE_TOROUT_ERRSTRING(f);
|
||||
|
Reference in New Issue
Block a user