Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RCU changes from Ingo Molnar:
 "This is the v3.5 RCU tree from Paul E.  McKenney:

 1) A set of improvements and fixes to the RCU_FAST_NO_HZ feature (with
    more on the way for 3.6).  Posted to LKML:
       https://lkml.org/lkml/2012/4/23/324 (commits 1-3 and 5),
       https://lkml.org/lkml/2012/4/16/611 (commit 4),
       https://lkml.org/lkml/2012/4/30/390 (commit 6), and
       https://lkml.org/lkml/2012/5/4/410 (commit 7, combined with
       the other commits for the convenience of the tester).

 2) Changes to make rcu_barrier() avoid disrupting execution of CPUs
    that have no RCU callbacks.  Posted to LKML:
       https://lkml.org/lkml/2012/4/23/322.

 3) A couple of commits that improve the efficiency of the interaction
    between preemptible RCU and the scheduler, these two being all that
    survived an abortive attempt to allow preemptible RCU's
    __rcu_read_lock() to be inlined.  The full set was posted to LKML at
    https://lkml.org/lkml/2012/4/14/143, and the first and third patches
    of that set remain.

 4) Lai Jiangshan's algorithmic implementation of SRCU, which includes
    call_srcu() and srcu_barrier().  A major feature of this new
    implementation is that synchronize_srcu() no longer disturbs the
    execution of other CPUs.  This work is based on earlier
    implementations by Peter Zijlstra and Paul E.  McKenney.  Posted to
    LKML: https://lkml.org/lkml/2012/2/22/82.

 5) A number of miscellaneous bug fixes and improvements which were
    posted to LKML at: https://lkml.org/lkml/2012/4/23/353 with
    subsequent updates posted to LKML."

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (32 commits)
  rcu: Make rcu_barrier() less disruptive
  rcu: Explicitly initialize RCU_FAST_NO_HZ per-CPU variables
  rcu: Make RCU_FAST_NO_HZ handle timer migration
  rcu: Update RCU maintainership
  rcu: Make exit_rcu() more precise and consolidate
  rcu: Move PREEMPT_RCU preemption to switch_to() invocation
  rcu: Ensure that RCU_FAST_NO_HZ timers expire on correct CPU
  rcu: Add rcutorture test for call_srcu()
  rcu: Implement per-domain single-threaded call_srcu() state machine
  rcu: Use single value to handle expedited SRCU grace periods
  rcu: Improve srcu_readers_active_idx()'s cache locality
  rcu: Remove unused srcu_barrier()
  rcu: Implement a variant of Peter's SRCU algorithm
  rcu: Improve SRCU's wait_idx() comments
  rcu: Flip ->completed only once per SRCU grace period
  rcu: Increment upper bit only for srcu_read_lock()
  rcu: Remove fast check path from __synchronize_srcu()
  rcu: Direct algorithmic SRCU implementation
  rcu: Introduce rcutorture testing for rcu_barrier()
  timer: Fix mod_timer_pinned() header comment
  ...
This commit is contained in:
Linus Torvalds
2012-05-21 19:26:51 -07:00
23 changed files with 1358 additions and 353 deletions

View File

@@ -30,6 +30,7 @@
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
#ifndef CONFIG_DEBUG_LIST
static inline void __list_add_rcu(struct list_head *new,
struct list_head *prev, struct list_head *next)
{
@@ -38,6 +39,10 @@ static inline void __list_add_rcu(struct list_head *new,
rcu_assign_pointer(list_next_rcu(prev), new);
next->prev = new;
}
#else
extern void __list_add_rcu(struct list_head *new,
struct list_head *prev, struct list_head *next);
#endif
/**
* list_add_rcu - add a new entry to rcu-protected list
@@ -108,7 +113,7 @@ static inline void list_add_tail_rcu(struct list_head *new,
*/
static inline void list_del_rcu(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
__list_del_entry(entry);
entry->prev = LIST_POISON2;
}
@@ -228,18 +233,43 @@ static inline void list_splice_init_rcu(struct list_head *list,
})
/**
* list_first_entry_rcu - get the first element from a list
* Where are list_empty_rcu() and list_first_entry_rcu()?
*
* Implementing those functions following their counterparts list_empty() and
* list_first_entry() is not advisable because they lead to subtle race
* conditions as the following snippet shows:
*
* if (!list_empty_rcu(mylist)) {
* struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member);
* do_something(bar);
* }
*
* The list may not be empty when list_empty_rcu checks it, but it may be when
* list_first_entry_rcu rereads the ->next pointer.
*
* Rereading the ->next pointer is not a problem for list_empty() and
* list_first_entry() because they would be protected by a lock that blocks
* writers.
*
* See list_first_or_null_rcu for an alternative.
*/
/**
* list_first_or_null_rcu - get the first element from a list
* @ptr: the list head to take the element from.
* @type: the type of the struct this is embedded in.
* @member: the name of the list_struct within the struct.
*
* Note, that list is expected to be not empty.
* Note that if the list is empty, it returns NULL.
*
* This primitive may safely run concurrently with the _rcu list-mutation
* primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
*/
#define list_first_entry_rcu(ptr, type, member) \
list_entry_rcu((ptr)->next, type, member)
#define list_first_or_null_rcu(ptr, type, member) \
({struct list_head *__ptr = (ptr); \
struct list_head __rcu *__next = list_next_rcu(__ptr); \
likely(__ptr != __next) ? container_of(__next, type, member) : NULL; \
})
/**
* list_for_each_entry_rcu - iterate over rcu list of given type

View File

@@ -184,12 +184,14 @@ static inline int rcu_preempt_depth(void)
/* Internal to kernel */
extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_preempt_note_context_switch(void);
extern void rcu_check_callbacks(int cpu, int user);
struct notifier_block;
extern void rcu_idle_enter(void);
extern void rcu_idle_exit(void);
extern void rcu_irq_enter(void);
extern void rcu_irq_exit(void);
extern void exit_rcu(void);
/**
* RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
@@ -922,6 +924,21 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
kfree_call_rcu(head, (rcu_callback)offset);
}
/*
* Does the specified offset indicate that the corresponding rcu_head
* structure can be handled by kfree_rcu()?
*/
#define __is_kfree_rcu_offset(offset) ((offset) < 4096)
/*
* Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
*/
#define __kfree_rcu(head, offset) \
do { \
BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
} while (0)
/**
* kfree_rcu() - kfree an object after a grace period.
* @ptr: pointer to kfree
@@ -944,6 +961,9 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
*
* Note that the allowable offset might decrease in the future, for example,
* to allow something like kmem_cache_free_rcu().
*
* The BUILD_BUG_ON check must not involve any function calls, hence the
* checks are done in macros here.
*/
#define kfree_rcu(ptr, rcu_head) \
__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))

View File

@@ -87,14 +87,6 @@ static inline void kfree_call_rcu(struct rcu_head *head,
#ifdef CONFIG_TINY_RCU
static inline void rcu_preempt_note_context_switch(void)
{
}
static inline void exit_rcu(void)
{
}
static inline int rcu_needs_cpu(int cpu)
{
return 0;
@@ -102,8 +94,6 @@ static inline int rcu_needs_cpu(int cpu)
#else /* #ifdef CONFIG_TINY_RCU */
void rcu_preempt_note_context_switch(void);
extern void exit_rcu(void);
int rcu_preempt_needs_cpu(void);
static inline int rcu_needs_cpu(int cpu)
@@ -116,7 +106,6 @@ static inline int rcu_needs_cpu(int cpu)
static inline void rcu_note_context_switch(int cpu)
{
rcu_sched_qs(cpu);
rcu_preempt_note_context_switch();
}
/*

View File

@@ -45,18 +45,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
rcu_note_context_switch(cpu);
}
#ifdef CONFIG_TREE_PREEMPT_RCU
extern void exit_rcu(void);
#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
static inline void exit_rcu(void)
{
}
#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
extern void synchronize_rcu_bh(void);
extern void synchronize_sched_expedited(void);
extern void synchronize_rcu_expedited(void);
@@ -98,13 +86,6 @@ extern void rcu_force_quiescent_state(void);
extern void rcu_bh_force_quiescent_state(void);
extern void rcu_sched_force_quiescent_state(void);
/* A context switch is a grace period for RCU-sched and RCU-bh. */
static inline int rcu_blocking_is_gp(void)
{
might_sleep(); /* Check for RCU read-side critical section. */
return num_online_cpus() == 1;
}
extern void rcu_scheduler_starting(void);
extern int rcu_scheduler_active __read_mostly;

View File

@@ -1905,12 +1905,22 @@ static inline void rcu_copy_process(struct task_struct *p)
INIT_LIST_HEAD(&p->rcu_node_entry);
}
static inline void rcu_switch_from(struct task_struct *prev)
{
if (prev->rcu_read_lock_nesting != 0)
rcu_preempt_note_context_switch();
}
#else
static inline void rcu_copy_process(struct task_struct *p)
{
}
static inline void rcu_switch_from(struct task_struct *prev)
{
}
#endif
#ifdef CONFIG_SMP

View File

@@ -29,26 +29,35 @@
#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
struct srcu_struct_array {
int c[2];
unsigned long c[2];
unsigned long seq[2];
};
struct rcu_batch {
struct rcu_head *head, **tail;
};
struct srcu_struct {
int completed;
unsigned completed;
struct srcu_struct_array __percpu *per_cpu_ref;
struct mutex mutex;
spinlock_t queue_lock; /* protect ->batch_queue, ->running */
bool running;
/* callbacks just queued */
struct rcu_batch batch_queue;
/* callbacks try to do the first check_zero */
struct rcu_batch batch_check0;
/* callbacks done with the first check_zero and the flip */
struct rcu_batch batch_check1;
struct rcu_batch batch_done;
struct delayed_work work;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
};
#ifndef CONFIG_PREEMPT
#define srcu_barrier() barrier()
#else /* #ifndef CONFIG_PREEMPT */
#define srcu_barrier()
#endif /* #else #ifndef CONFIG_PREEMPT */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
int __init_srcu_struct(struct srcu_struct *sp, const char *name,
@@ -67,12 +76,33 @@ int init_srcu_struct(struct srcu_struct *sp);
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/**
* call_srcu() - Queue a callback for invocation after an SRCU grace period
* @sp: srcu_struct in queue the callback
* @head: structure to be used for queueing the SRCU callback.
* @func: function to be invoked after the SRCU grace period
*
* The callback function will be invoked some time after a full SRCU
* grace period elapses, in other words after all pre-existing SRCU
* read-side critical sections have completed. However, the callback
* function might well execute concurrently with other SRCU read-side
* critical sections that started after call_srcu() was invoked. SRCU
* read-side critical sections are delimited by srcu_read_lock() and
* srcu_read_unlock(), and may be nested.
*
* The callback will be invoked from process context, but must nevertheless
* be fast and must not block.
*/
void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
void (*func)(struct rcu_head *head));
void cleanup_srcu_struct(struct srcu_struct *sp);
int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
void synchronize_srcu(struct srcu_struct *sp);
void synchronize_srcu_expedited(struct srcu_struct *sp);
long srcu_batches_completed(struct srcu_struct *sp);
void srcu_barrier(struct srcu_struct *sp);
#ifdef CONFIG_DEBUG_LOCK_ALLOC