Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnad: "The main RCU related changes in this cycle were: - Removal of spin_unlock_wait() - SRCU updates - RCU torture-test updates - RCU Documentation updates - Extend the sys_membarrier() ABI with the MEMBARRIER_CMD_PRIVATE_EXPEDITED variant - Miscellaneous RCU fixes - CPU-hotplug fixes" * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (63 commits) arch: Remove spin_unlock_wait() arch-specific definitions locking: Remove spin_unlock_wait() generic definitions drivers/ata: Replace spin_unlock_wait() with lock/unlock pair ipc: Replace spin_unlock_wait() with lock/unlock pair exit: Replace spin_unlock_wait() with lock/unlock pair completion: Replace spin_unlock_wait() with lock/unlock pair doc: Set down RCU's scheduling-clock-interrupt needs doc: No longer allowed to use rcu_dereference on non-pointers doc: Add RCU files to docbook-generation files doc: Update memory-barriers.txt for read-to-write dependencies doc: Update RCU documentation membarrier: Provide expedited private command rcu: Remove exports from rcu_idle_exit() and rcu_idle_enter() rcu: Add warning to rcu_idle_enter() for irqs enabled rcu: Make rcu_idle_enter() rely on callers disabling irqs rcu: Add assertions verifying blocked-tasks list rcu/tracing: Set disable_rcu_irq_enter on rcu_eqs_exit() rcu: Add TPS() protection for _rcu_barrier_trace strings rcu: Use idle versions of swait to make idle-hack clear swait: Add idle variants which don't contribute to load average ...
This commit is contained in:
@@ -25,3 +25,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
||||
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
|
||||
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
|
||||
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
|
||||
obj-$(CONFIG_MEMBARRIER) += membarrier.o
|
||||
|
@@ -300,6 +300,8 @@ EXPORT_SYMBOL(try_wait_for_completion);
|
||||
*/
|
||||
bool completion_done(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!READ_ONCE(x->done))
|
||||
return false;
|
||||
|
||||
@@ -307,14 +309,9 @@ bool completion_done(struct completion *x)
|
||||
* If ->done, we need to wait for complete() to release ->wait.lock
|
||||
* otherwise we can end up freeing the completion before complete()
|
||||
* is done referencing it.
|
||||
*
|
||||
* The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
|
||||
* the loads of ->done and ->wait.lock such that we cannot observe
|
||||
* the lock before complete() acquires it while observing the ->done
|
||||
* after it's acquired the lock.
|
||||
*/
|
||||
smp_rmb();
|
||||
spin_unlock_wait(&x->wait.lock);
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(completion_done);
|
||||
|
@@ -951,8 +951,13 @@ struct migration_arg {
|
||||
static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
|
||||
struct task_struct *p, int dest_cpu)
|
||||
{
|
||||
if (unlikely(!cpu_active(dest_cpu)))
|
||||
return rq;
|
||||
if (p->flags & PF_KTHREAD) {
|
||||
if (unlikely(!cpu_online(dest_cpu)))
|
||||
return rq;
|
||||
} else {
|
||||
if (unlikely(!cpu_active(dest_cpu)))
|
||||
return rq;
|
||||
}
|
||||
|
||||
/* Affinity changed (again). */
|
||||
if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
|
||||
@@ -2635,6 +2640,16 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
||||
prev_state = prev->state;
|
||||
vtime_task_switch(prev);
|
||||
perf_event_task_sched_in(prev, current);
|
||||
/*
|
||||
* The membarrier system call requires a full memory barrier
|
||||
* after storing to rq->curr, before going back to user-space.
|
||||
*
|
||||
* TODO: This smp_mb__after_unlock_lock can go away if PPC end
|
||||
* up adding a full barrier to switch_mm(), or we should figure
|
||||
* out if a smp_mb__after_unlock_lock is really the proper API
|
||||
* to use.
|
||||
*/
|
||||
smp_mb__after_unlock_lock();
|
||||
finish_lock_switch(rq, prev);
|
||||
finish_arch_post_lock_switch();
|
||||
|
||||
@@ -3324,6 +3339,21 @@ static void __sched notrace __schedule(bool preempt)
|
||||
if (likely(prev != next)) {
|
||||
rq->nr_switches++;
|
||||
rq->curr = next;
|
||||
/*
|
||||
* The membarrier system call requires each architecture
|
||||
* to have a full memory barrier after updating
|
||||
* rq->curr, before returning to user-space. For TSO
|
||||
* (e.g. x86), the architecture must provide its own
|
||||
* barrier in switch_mm(). For weakly ordered machines
|
||||
* for which spin_unlock() acts as a full memory
|
||||
* barrier, finish_lock_switch() in common code takes
|
||||
* care of this barrier. For weakly ordered machines for
|
||||
* which spin_unlock() acts as a RELEASE barrier (only
|
||||
* arm64 and PowerPC), arm64 has a full barrier in
|
||||
* switch_to(), and PowerPC has
|
||||
* smp_mb__after_unlock_lock() before
|
||||
* finish_lock_switch().
|
||||
*/
|
||||
++*switch_count;
|
||||
|
||||
trace_sched_switch(preempt, prev, next);
|
||||
@@ -3352,8 +3382,8 @@ void __noreturn do_task_dead(void)
|
||||
* To avoid it, we have to wait for releasing tsk->pi_lock which
|
||||
* is held by try_to_wake_up()
|
||||
*/
|
||||
smp_mb();
|
||||
raw_spin_unlock_wait(¤t->pi_lock);
|
||||
raw_spin_lock_irq(¤t->pi_lock);
|
||||
raw_spin_unlock_irq(¤t->pi_lock);
|
||||
|
||||
/* Causes final put_task_struct in finish_task_switch(): */
|
||||
__set_current_state(TASK_DEAD);
|
||||
|
152
kernel/sched/membarrier.c
Normal file
152
kernel/sched/membarrier.c
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
||||
*
|
||||
* membarrier system call
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/membarrier.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
#include "sched.h" /* for cpu_rq(). */
|
||||
|
||||
/*
|
||||
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
|
||||
* except MEMBARRIER_CMD_QUERY.
|
||||
*/
|
||||
#define MEMBARRIER_CMD_BITMASK \
|
||||
(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
|
||||
|
||||
static void ipi_mb(void *info)
|
||||
{
|
||||
smp_mb(); /* IPIs should be serializing but paranoid. */
|
||||
}
|
||||
|
||||
static void membarrier_private_expedited(void)
|
||||
{
|
||||
int cpu;
|
||||
bool fallback = false;
|
||||
cpumask_var_t tmpmask;
|
||||
|
||||
if (num_online_cpus() == 1)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Matches memory barriers around rq->curr modification in
|
||||
* scheduler.
|
||||
*/
|
||||
smp_mb(); /* system call entry is not a mb. */
|
||||
|
||||
/*
|
||||
* Expedited membarrier commands guarantee that they won't
|
||||
* block, hence the GFP_NOWAIT allocation flag and fallback
|
||||
* implementation.
|
||||
*/
|
||||
if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
|
||||
/* Fallback for OOM. */
|
||||
fallback = true;
|
||||
}
|
||||
|
||||
cpus_read_lock();
|
||||
for_each_online_cpu(cpu) {
|
||||
struct task_struct *p;
|
||||
|
||||
/*
|
||||
* Skipping the current CPU is OK even through we can be
|
||||
* migrated at any point. The current CPU, at the point
|
||||
* where we read raw_smp_processor_id(), is ensured to
|
||||
* be in program order with respect to the caller
|
||||
* thread. Therefore, we can skip this CPU from the
|
||||
* iteration.
|
||||
*/
|
||||
if (cpu == raw_smp_processor_id())
|
||||
continue;
|
||||
rcu_read_lock();
|
||||
p = task_rcu_dereference(&cpu_rq(cpu)->curr);
|
||||
if (p && p->mm == current->mm) {
|
||||
if (!fallback)
|
||||
__cpumask_set_cpu(cpu, tmpmask);
|
||||
else
|
||||
smp_call_function_single(cpu, ipi_mb, NULL, 1);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
if (!fallback) {
|
||||
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
|
||||
free_cpumask_var(tmpmask);
|
||||
}
|
||||
cpus_read_unlock();
|
||||
|
||||
/*
|
||||
* Memory barrier on the caller thread _after_ we finished
|
||||
* waiting for the last IPI. Matches memory barriers around
|
||||
* rq->curr modification in scheduler.
|
||||
*/
|
||||
smp_mb(); /* exit from system call is not a mb */
|
||||
}
|
||||
|
||||
/**
|
||||
* sys_membarrier - issue memory barriers on a set of threads
|
||||
* @cmd: Takes command values defined in enum membarrier_cmd.
|
||||
* @flags: Currently needs to be 0. For future extensions.
|
||||
*
|
||||
* If this system call is not implemented, -ENOSYS is returned. If the
|
||||
* command specified does not exist, not available on the running
|
||||
* kernel, or if the command argument is invalid, this system call
|
||||
* returns -EINVAL. For a given command, with flags argument set to 0,
|
||||
* this system call is guaranteed to always return the same value until
|
||||
* reboot.
|
||||
*
|
||||
* All memory accesses performed in program order from each targeted thread
|
||||
* is guaranteed to be ordered with respect to sys_membarrier(). If we use
|
||||
* the semantic "barrier()" to represent a compiler barrier forcing memory
|
||||
* accesses to be performed in program order across the barrier, and
|
||||
* smp_mb() to represent explicit memory barriers forcing full memory
|
||||
* ordering across the barrier, we have the following ordering table for
|
||||
* each pair of barrier(), sys_membarrier() and smp_mb():
|
||||
*
|
||||
* The pair ordering is detailed as (O: ordered, X: not ordered):
|
||||
*
|
||||
* barrier() smp_mb() sys_membarrier()
|
||||
* barrier() X X O
|
||||
* smp_mb() X O O
|
||||
* sys_membarrier() O O O
|
||||
*/
|
||||
SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
|
||||
{
|
||||
if (unlikely(flags))
|
||||
return -EINVAL;
|
||||
switch (cmd) {
|
||||
case MEMBARRIER_CMD_QUERY:
|
||||
{
|
||||
int cmd_mask = MEMBARRIER_CMD_BITMASK;
|
||||
|
||||
if (tick_nohz_full_enabled())
|
||||
cmd_mask &= ~MEMBARRIER_CMD_SHARED;
|
||||
return cmd_mask;
|
||||
}
|
||||
case MEMBARRIER_CMD_SHARED:
|
||||
/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
|
||||
if (tick_nohz_full_enabled())
|
||||
return -EINVAL;
|
||||
if (num_online_cpus() > 1)
|
||||
synchronize_sched();
|
||||
return 0;
|
||||
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
|
||||
membarrier_private_expedited();
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user