Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RCU updates from Ingo Molnad:
 "The main RCU related changes in this cycle were:

   - Removal of spin_unlock_wait()
   - SRCU updates
   - RCU torture-test updates
   - RCU Documentation updates
   - Extend the sys_membarrier() ABI with the MEMBARRIER_CMD_PRIVATE_EXPEDITED variant
   - Miscellaneous RCU fixes
   - CPU-hotplug fixes"

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (63 commits)
  arch: Remove spin_unlock_wait() arch-specific definitions
  locking: Remove spin_unlock_wait() generic definitions
  drivers/ata: Replace spin_unlock_wait() with lock/unlock pair
  ipc: Replace spin_unlock_wait() with lock/unlock pair
  exit: Replace spin_unlock_wait() with lock/unlock pair
  completion: Replace spin_unlock_wait() with lock/unlock pair
  doc: Set down RCU's scheduling-clock-interrupt needs
  doc: No longer allowed to use rcu_dereference on non-pointers
  doc: Add RCU files to docbook-generation files
  doc: Update memory-barriers.txt for read-to-write dependencies
  doc: Update RCU documentation
  membarrier: Provide expedited private command
  rcu: Remove exports from rcu_idle_exit() and rcu_idle_enter()
  rcu: Add warning to rcu_idle_enter() for irqs enabled
  rcu: Make rcu_idle_enter() rely on callers disabling irqs
  rcu: Add assertions verifying blocked-tasks list
  rcu/tracing: Set disable_rcu_irq_enter on rcu_eqs_exit()
  rcu: Add TPS() protection for _rcu_barrier_trace strings
  rcu: Use idle versions of swait to make idle-hack clear
  swait: Add idle variants which don't contribute to load average
  ...
This commit is contained in:
Linus Torvalds
2017-09-04 08:13:52 -07:00
85 changed files with 1242 additions and 1328 deletions

View File

@@ -25,3 +25,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
obj-$(CONFIG_MEMBARRIER) += membarrier.o

View File

@@ -300,6 +300,8 @@ EXPORT_SYMBOL(try_wait_for_completion);
*/
bool completion_done(struct completion *x)
{
unsigned long flags;
if (!READ_ONCE(x->done))
return false;
@@ -307,14 +309,9 @@ bool completion_done(struct completion *x)
* If ->done, we need to wait for complete() to release ->wait.lock
* otherwise we can end up freeing the completion before complete()
* is done referencing it.
*
* The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
* the loads of ->done and ->wait.lock such that we cannot observe
* the lock before complete() acquires it while observing the ->done
* after it's acquired the lock.
*/
smp_rmb();
spin_unlock_wait(&x->wait.lock);
spin_lock_irqsave(&x->wait.lock, flags);
spin_unlock_irqrestore(&x->wait.lock, flags);
return true;
}
EXPORT_SYMBOL(completion_done);

View File

@@ -951,8 +951,13 @@ struct migration_arg {
static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
struct task_struct *p, int dest_cpu)
{
if (unlikely(!cpu_active(dest_cpu)))
return rq;
if (p->flags & PF_KTHREAD) {
if (unlikely(!cpu_online(dest_cpu)))
return rq;
} else {
if (unlikely(!cpu_active(dest_cpu)))
return rq;
}
/* Affinity changed (again). */
if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
@@ -2635,6 +2640,16 @@ static struct rq *finish_task_switch(struct task_struct *prev)
prev_state = prev->state;
vtime_task_switch(prev);
perf_event_task_sched_in(prev, current);
/*
* The membarrier system call requires a full memory barrier
* after storing to rq->curr, before going back to user-space.
*
* TODO: This smp_mb__after_unlock_lock can go away if PPC end
* up adding a full barrier to switch_mm(), or we should figure
* out if a smp_mb__after_unlock_lock is really the proper API
* to use.
*/
smp_mb__after_unlock_lock();
finish_lock_switch(rq, prev);
finish_arch_post_lock_switch();
@@ -3324,6 +3339,21 @@ static void __sched notrace __schedule(bool preempt)
if (likely(prev != next)) {
rq->nr_switches++;
rq->curr = next;
/*
* The membarrier system call requires each architecture
* to have a full memory barrier after updating
* rq->curr, before returning to user-space. For TSO
* (e.g. x86), the architecture must provide its own
* barrier in switch_mm(). For weakly ordered machines
* for which spin_unlock() acts as a full memory
* barrier, finish_lock_switch() in common code takes
* care of this barrier. For weakly ordered machines for
* which spin_unlock() acts as a RELEASE barrier (only
* arm64 and PowerPC), arm64 has a full barrier in
* switch_to(), and PowerPC has
* smp_mb__after_unlock_lock() before
* finish_lock_switch().
*/
++*switch_count;
trace_sched_switch(preempt, prev, next);
@@ -3352,8 +3382,8 @@ void __noreturn do_task_dead(void)
* To avoid it, we have to wait for releasing tsk->pi_lock which
* is held by try_to_wake_up()
*/
smp_mb();
raw_spin_unlock_wait(&current->pi_lock);
raw_spin_lock_irq(&current->pi_lock);
raw_spin_unlock_irq(&current->pi_lock);
/* Causes final put_task_struct in finish_task_switch(): */
__set_current_state(TASK_DEAD);

152
kernel/sched/membarrier.c Normal file
View File

@@ -0,0 +1,152 @@
/*
* Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*
* membarrier system call
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/syscalls.h>
#include <linux/membarrier.h>
#include <linux/tick.h>
#include <linux/cpumask.h>
#include "sched.h" /* for cpu_rq(). */
/*
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
* except MEMBARRIER_CMD_QUERY.
*/
#define MEMBARRIER_CMD_BITMASK \
(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
static void ipi_mb(void *info)
{
smp_mb(); /* IPIs should be serializing but paranoid. */
}
static void membarrier_private_expedited(void)
{
int cpu;
bool fallback = false;
cpumask_var_t tmpmask;
if (num_online_cpus() == 1)
return;
/*
* Matches memory barriers around rq->curr modification in
* scheduler.
*/
smp_mb(); /* system call entry is not a mb. */
/*
* Expedited membarrier commands guarantee that they won't
* block, hence the GFP_NOWAIT allocation flag and fallback
* implementation.
*/
if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
/* Fallback for OOM. */
fallback = true;
}
cpus_read_lock();
for_each_online_cpu(cpu) {
struct task_struct *p;
/*
* Skipping the current CPU is OK even through we can be
* migrated at any point. The current CPU, at the point
* where we read raw_smp_processor_id(), is ensured to
* be in program order with respect to the caller
* thread. Therefore, we can skip this CPU from the
* iteration.
*/
if (cpu == raw_smp_processor_id())
continue;
rcu_read_lock();
p = task_rcu_dereference(&cpu_rq(cpu)->curr);
if (p && p->mm == current->mm) {
if (!fallback)
__cpumask_set_cpu(cpu, tmpmask);
else
smp_call_function_single(cpu, ipi_mb, NULL, 1);
}
rcu_read_unlock();
}
if (!fallback) {
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
free_cpumask_var(tmpmask);
}
cpus_read_unlock();
/*
* Memory barrier on the caller thread _after_ we finished
* waiting for the last IPI. Matches memory barriers around
* rq->curr modification in scheduler.
*/
smp_mb(); /* exit from system call is not a mb */
}
/**
* sys_membarrier - issue memory barriers on a set of threads
* @cmd: Takes command values defined in enum membarrier_cmd.
* @flags: Currently needs to be 0. For future extensions.
*
* If this system call is not implemented, -ENOSYS is returned. If the
* command specified does not exist, not available on the running
* kernel, or if the command argument is invalid, this system call
* returns -EINVAL. For a given command, with flags argument set to 0,
* this system call is guaranteed to always return the same value until
* reboot.
*
* All memory accesses performed in program order from each targeted thread
* is guaranteed to be ordered with respect to sys_membarrier(). If we use
* the semantic "barrier()" to represent a compiler barrier forcing memory
* accesses to be performed in program order across the barrier, and
* smp_mb() to represent explicit memory barriers forcing full memory
* ordering across the barrier, we have the following ordering table for
* each pair of barrier(), sys_membarrier() and smp_mb():
*
* The pair ordering is detailed as (O: ordered, X: not ordered):
*
* barrier() smp_mb() sys_membarrier()
* barrier() X X O
* smp_mb() X O O
* sys_membarrier() O O O
*/
SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
{
if (unlikely(flags))
return -EINVAL;
switch (cmd) {
case MEMBARRIER_CMD_QUERY:
{
int cmd_mask = MEMBARRIER_CMD_BITMASK;
if (tick_nohz_full_enabled())
cmd_mask &= ~MEMBARRIER_CMD_SHARED;
return cmd_mask;
}
case MEMBARRIER_CMD_SHARED:
/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
if (tick_nohz_full_enabled())
return -EINVAL;
if (num_online_cpus() > 1)
synchronize_sched();
return 0;
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
membarrier_private_expedited();
return 0;
default:
return -EINVAL;
}
}