Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar: "The main changes in this cycle are: - (much) improved CONFIG_NUMA_BALANCING support from Mel Gorman, Rik van Riel, Peter Zijlstra et al. Yay! - optimize preemption counter handling: merge the NEED_RESCHED flag into the preempt_count variable, by Peter Zijlstra. - wait.h fixes and code reorganization from Peter Zijlstra - cfs_bandwidth fixes from Ben Segall - SMP load-balancer cleanups from Peter Zijstra - idle balancer improvements from Jason Low - other fixes and cleanups" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (129 commits) ftrace, sched: Add TRACE_FLAG_PREEMPT_RESCHED stop_machine: Fix race between stop_two_cpus() and stop_cpus() sched: Remove unnecessary iteration over sched domains to update nr_busy_cpus sched: Fix asymmetric scheduling for POWER7 sched: Move completion code from core.c to completion.c sched: Move wait code from core.c to wait.c sched: Move wait.c into kernel/sched/ sched/wait: Fix __wait_event_interruptible_lock_irq_timeout() sched: Avoid throttle_cfs_rq() racing with period_timer stopping sched: Guarantee new group-entities always have weight sched: Fix hrtimer_cancel()/rq->lock deadlock sched: Fix cfs_bandwidth misuse of hrtimer_expires_remaining sched: Fix race on toggling cfs_bandwidth_used sched: Remove extra put_online_cpus() inside sched_setaffinity() sched/rt: Fix task_tick_rt() comment sched/wait: Fix build breakage sched/wait: Introduce prepare_to_wait_event() sched/wait: Add ___wait_cond_timeout() to wait_event*_timeout() too sched: Remove get_online_cpus() usage sched: Fix race in migrate_swap_stop() ...
This commit is contained in:
@@ -7,7 +7,7 @@ obj-y = fork.o exec_domain.o panic.o \
|
||||
sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
|
||||
signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
|
||||
extable.o params.o posix-timers.o \
|
||||
kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \
|
||||
kthread.o sys_ni.o posix-cpu-timers.o mutex.o \
|
||||
hrtimer.o rwsem.o nsproxy.o semaphore.o \
|
||||
notifier.o ksysfs.o cred.o reboot.o \
|
||||
async.o range.o groups.o lglock.o smpboot.o
|
||||
|
@@ -10,6 +10,7 @@
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/kbuild.h>
|
||||
#include <linux/page_cgroup.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
void foo(void)
|
||||
{
|
||||
@@ -17,5 +18,8 @@ void foo(void)
|
||||
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
|
||||
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
|
||||
DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
|
||||
#ifdef CONFIG_SMP
|
||||
DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
|
||||
#endif
|
||||
/* End of constants */
|
||||
}
|
||||
|
@@ -120,7 +120,7 @@ void context_tracking_user_enter(void)
|
||||
* instead of preempt_schedule() to exit user context if needed before
|
||||
* calling the scheduler.
|
||||
*/
|
||||
void __sched notrace preempt_schedule_context(void)
|
||||
asmlinkage void __sched notrace preempt_schedule_context(void)
|
||||
{
|
||||
enum ctx_state prev_ctx;
|
||||
|
||||
|
17
kernel/cpu.c
17
kernel/cpu.c
@@ -308,6 +308,23 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||
}
|
||||
smpboot_park_threads(cpu);
|
||||
|
||||
/*
|
||||
* By now we've cleared cpu_active_mask, wait for all preempt-disabled
|
||||
* and RCU users of this state to go away such that all new such users
|
||||
* will observe it.
|
||||
*
|
||||
* For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
|
||||
* not imply sync_sched(), so explicitly call both.
|
||||
*/
|
||||
#ifdef CONFIG_PREEMPT
|
||||
synchronize_sched();
|
||||
#endif
|
||||
synchronize_rcu();
|
||||
|
||||
/*
|
||||
* So now all preempt/rcu users must observe !cpu_active().
|
||||
*/
|
||||
|
||||
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
|
||||
if (err) {
|
||||
/* CPU didn't die: tell everyone. Can't complain. */
|
||||
|
@@ -44,7 +44,7 @@ static inline int cpu_idle_poll(void)
|
||||
rcu_idle_enter();
|
||||
trace_cpu_idle_rcuidle(0, smp_processor_id());
|
||||
local_irq_enable();
|
||||
while (!need_resched())
|
||||
while (!tif_need_resched())
|
||||
cpu_relax();
|
||||
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
|
||||
rcu_idle_exit();
|
||||
@@ -92,8 +92,7 @@ static void cpu_idle_loop(void)
|
||||
if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
|
||||
cpu_idle_poll();
|
||||
} else {
|
||||
current_clr_polling();
|
||||
if (!need_resched()) {
|
||||
if (!current_clr_polling_and_test()) {
|
||||
stop_critical_timings();
|
||||
rcu_idle_enter();
|
||||
arch_cpu_idle();
|
||||
@@ -103,9 +102,16 @@ static void cpu_idle_loop(void)
|
||||
} else {
|
||||
local_irq_enable();
|
||||
}
|
||||
current_set_polling();
|
||||
__current_set_polling();
|
||||
}
|
||||
arch_cpu_idle_exit();
|
||||
/*
|
||||
* We need to test and propagate the TIF_NEED_RESCHED
|
||||
* bit here because we might not have send the
|
||||
* reschedule IPI to idle tasks.
|
||||
*/
|
||||
if (tif_need_resched())
|
||||
set_preempt_need_resched();
|
||||
}
|
||||
tick_nohz_idle_exit();
|
||||
schedule_preempt_disabled();
|
||||
@@ -129,7 +135,7 @@ void cpu_startup_entry(enum cpuhp_state state)
|
||||
*/
|
||||
boot_init_stack_canary();
|
||||
#endif
|
||||
current_set_polling();
|
||||
__current_set_polling();
|
||||
arch_cpu_idle_prepare();
|
||||
cpu_idle_loop();
|
||||
}
|
||||
|
@@ -816,9 +816,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
mm->pmd_huge_pte = NULL;
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
mm->first_nid = NUMA_PTE_SCAN_INIT;
|
||||
#endif
|
||||
if (!mm_init(mm, tsk))
|
||||
goto fail_nomem;
|
||||
@@ -1313,7 +1310,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
#endif
|
||||
|
||||
/* Perform scheduler related setup. Assign this task to a CPU. */
|
||||
sched_fork(p);
|
||||
sched_fork(clone_flags, p);
|
||||
|
||||
retval = perf_event_init_task(p);
|
||||
if (retval)
|
||||
|
@@ -916,6 +916,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
force_quiescent_state(rsp); /* Kick them all. */
|
||||
}
|
||||
|
||||
/*
|
||||
* This function really isn't for public consumption, but RCU is special in
|
||||
* that context switches can allow the state machine to make progress.
|
||||
*/
|
||||
extern void resched_cpu(int cpu);
|
||||
|
||||
static void print_cpu_stall(struct rcu_state *rsp)
|
||||
{
|
||||
int cpu;
|
||||
@@ -945,7 +951,14 @@ static void print_cpu_stall(struct rcu_state *rsp)
|
||||
3 * rcu_jiffies_till_stall_check() + 3;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
|
||||
set_need_resched(); /* kick ourselves to get things going. */
|
||||
/*
|
||||
* Attempt to revive the RCU machinery by forcing a context switch.
|
||||
*
|
||||
* A context switch would normally allow the RCU state machine to make
|
||||
* progress and it could be we're stuck in kernel space without context
|
||||
* switches for an entirely unreasonable amount of time.
|
||||
*/
|
||||
resched_cpu(smp_processor_id());
|
||||
}
|
||||
|
||||
static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
|
@@ -12,6 +12,7 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
|
||||
endif
|
||||
|
||||
obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o
|
||||
obj-y += wait.o completion.o
|
||||
obj-$(CONFIG_SMP) += cpupri.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
|
299
kernel/sched/completion.c
Normal file
299
kernel/sched/completion.c
Normal file
@@ -0,0 +1,299 @@
|
||||
/*
|
||||
* Generic wait-for-completion handler;
|
||||
*
|
||||
* It differs from semaphores in that their default case is the opposite,
|
||||
* wait_for_completion default blocks whereas semaphore default non-block. The
|
||||
* interface also makes it easy to 'complete' multiple waiting threads,
|
||||
* something which isn't entirely natural for semaphores.
|
||||
*
|
||||
* But more importantly, the primitive documents the usage. Semaphores would
|
||||
* typically be used for exclusion which gives rise to priority inversion.
|
||||
* Waiting for completion is a typically sync point, but not an exclusion point.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/completion.h>
|
||||
|
||||
/**
|
||||
* complete: - signals a single thread waiting on this completion
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This will wake up a single thread waiting on this completion. Threads will be
|
||||
* awakened in the same order in which they were queued.
|
||||
*
|
||||
* See also complete_all(), wait_for_completion() and related routines.
|
||||
*
|
||||
* It may be assumed that this function implies a write memory barrier before
|
||||
* changing the task state if and only if any tasks are woken up.
|
||||
*/
|
||||
void complete(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
x->done++;
|
||||
__wake_up_locked(&x->wait, TASK_NORMAL, 1);
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete);
|
||||
|
||||
/**
|
||||
* complete_all: - signals all threads waiting on this completion
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This will wake up all threads waiting on this particular completion event.
|
||||
*
|
||||
* It may be assumed that this function implies a write memory barrier before
|
||||
* changing the task state if and only if any tasks are woken up.
|
||||
*/
|
||||
void complete_all(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
x->done += UINT_MAX/2;
|
||||
__wake_up_locked(&x->wait, TASK_NORMAL, 0);
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete_all);
|
||||
|
||||
static inline long __sched
|
||||
do_wait_for_common(struct completion *x,
|
||||
long (*action)(long), long timeout, int state)
|
||||
{
|
||||
if (!x->done) {
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
__add_wait_queue_tail_exclusive(&x->wait, &wait);
|
||||
do {
|
||||
if (signal_pending_state(state, current)) {
|
||||
timeout = -ERESTARTSYS;
|
||||
break;
|
||||
}
|
||||
__set_current_state(state);
|
||||
spin_unlock_irq(&x->wait.lock);
|
||||
timeout = action(timeout);
|
||||
spin_lock_irq(&x->wait.lock);
|
||||
} while (!x->done && timeout);
|
||||
__remove_wait_queue(&x->wait, &wait);
|
||||
if (!x->done)
|
||||
return timeout;
|
||||
}
|
||||
x->done--;
|
||||
return timeout ?: 1;
|
||||
}
|
||||
|
||||
static inline long __sched
|
||||
__wait_for_common(struct completion *x,
|
||||
long (*action)(long), long timeout, int state)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
spin_lock_irq(&x->wait.lock);
|
||||
timeout = do_wait_for_common(x, action, timeout, state);
|
||||
spin_unlock_irq(&x->wait.lock);
|
||||
return timeout;
|
||||
}
|
||||
|
||||
static long __sched
|
||||
wait_for_common(struct completion *x, long timeout, int state)
|
||||
{
|
||||
return __wait_for_common(x, schedule_timeout, timeout, state);
|
||||
}
|
||||
|
||||
static long __sched
|
||||
wait_for_common_io(struct completion *x, long timeout, int state)
|
||||
{
|
||||
return __wait_for_common(x, io_schedule_timeout, timeout, state);
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_for_completion: - waits for completion of a task
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It is NOT
|
||||
* interruptible and there is no timeout.
|
||||
*
|
||||
* See also similar routines (i.e. wait_for_completion_timeout()) with timeout
|
||||
* and interrupt capability. Also see complete().
|
||||
*/
|
||||
void __sched wait_for_completion(struct completion *x)
|
||||
{
|
||||
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion);
|
||||
|
||||
/**
|
||||
* wait_for_completion_timeout: - waits for completion of a task (w/timeout)
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. The timeout is in jiffies. It is not
|
||||
* interruptible.
|
||||
*
|
||||
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
|
||||
* till timeout) if completed.
|
||||
*/
|
||||
unsigned long __sched
|
||||
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
|
||||
{
|
||||
return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_io: - waits for completion of a task
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It is NOT
|
||||
* interruptible and there is no timeout. The caller is accounted as waiting
|
||||
* for IO.
|
||||
*/
|
||||
void __sched wait_for_completion_io(struct completion *x)
|
||||
{
|
||||
wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_io);
|
||||
|
||||
/**
|
||||
* wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. The timeout is in jiffies. It is not
|
||||
* interruptible. The caller is accounted as waiting for IO.
|
||||
*
|
||||
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
|
||||
* till timeout) if completed.
|
||||
*/
|
||||
unsigned long __sched
|
||||
wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
|
||||
{
|
||||
return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_io_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_interruptible: - waits for completion of a task (w/intr)
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits for completion of a specific task to be signaled. It is
|
||||
* interruptible.
|
||||
*
|
||||
* Return: -ERESTARTSYS if interrupted, 0 if completed.
|
||||
*/
|
||||
int __sched wait_for_completion_interruptible(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
|
||||
if (t == -ERESTARTSYS)
|
||||
return t;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_interruptible);
|
||||
|
||||
/**
|
||||
* wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
|
||||
*
|
||||
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
|
||||
* or number of jiffies left till timeout) if completed.
|
||||
*/
|
||||
long __sched
|
||||
wait_for_completion_interruptible_timeout(struct completion *x,
|
||||
unsigned long timeout)
|
||||
{
|
||||
return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_killable: - waits for completion of a task (killable)
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It can be
|
||||
* interrupted by a kill signal.
|
||||
*
|
||||
* Return: -ERESTARTSYS if interrupted, 0 if completed.
|
||||
*/
|
||||
int __sched wait_for_completion_killable(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
|
||||
if (t == -ERESTARTSYS)
|
||||
return t;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_killable);
|
||||
|
||||
/**
|
||||
* wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be
|
||||
* signaled or for a specified timeout to expire. It can be
|
||||
* interrupted by a kill signal. The timeout is in jiffies.
|
||||
*
|
||||
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
|
||||
* or number of jiffies left till timeout) if completed.
|
||||
*/
|
||||
long __sched
|
||||
wait_for_completion_killable_timeout(struct completion *x,
|
||||
unsigned long timeout)
|
||||
{
|
||||
return wait_for_common(x, timeout, TASK_KILLABLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_killable_timeout);
|
||||
|
||||
/**
|
||||
* try_wait_for_completion - try to decrement a completion without blocking
|
||||
* @x: completion structure
|
||||
*
|
||||
* Return: 0 if a decrement cannot be done without blocking
|
||||
* 1 if a decrement succeeded.
|
||||
*
|
||||
* If a completion is being used as a counting completion,
|
||||
* attempt to decrement the counter without blocking. This
|
||||
* enables us to avoid waiting if the resource the completion
|
||||
* is protecting is not available.
|
||||
*/
|
||||
bool try_wait_for_completion(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = 0;
|
||||
else
|
||||
x->done--;
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(try_wait_for_completion);
|
||||
|
||||
/**
|
||||
* completion_done - Test to see if a completion has any waiters
|
||||
* @x: completion structure
|
||||
*
|
||||
* Return: 0 if there are waiters (wait_for_completion() in progress)
|
||||
* 1 if there are no waiters.
|
||||
*
|
||||
*/
|
||||
bool completion_done(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = 0;
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(completion_done);
|
@@ -513,12 +513,11 @@ static inline void init_hrtick(void)
|
||||
* might also involve a cross-CPU call to trigger the scheduler on
|
||||
* the target CPU.
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
void resched_task(struct task_struct *p)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
assert_raw_spin_locked(&task_rq(p)->lock);
|
||||
lockdep_assert_held(&task_rq(p)->lock);
|
||||
|
||||
if (test_tsk_need_resched(p))
|
||||
return;
|
||||
@@ -526,8 +525,10 @@ void resched_task(struct task_struct *p)
|
||||
set_tsk_need_resched(p);
|
||||
|
||||
cpu = task_cpu(p);
|
||||
if (cpu == smp_processor_id())
|
||||
if (cpu == smp_processor_id()) {
|
||||
set_preempt_need_resched();
|
||||
return;
|
||||
}
|
||||
|
||||
/* NEED_RESCHED must be visible before we test polling */
|
||||
smp_mb();
|
||||
@@ -546,6 +547,7 @@ void resched_cpu(int cpu)
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
/*
|
||||
* In the semi idle case, use the nearest busy cpu for migrating timers
|
||||
@@ -693,12 +695,6 @@ void sched_avg_update(struct rq *rq)
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !CONFIG_SMP */
|
||||
void resched_task(struct task_struct *p)
|
||||
{
|
||||
assert_raw_spin_locked(&task_rq(p)->lock);
|
||||
set_tsk_need_resched(p);
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
|
||||
@@ -767,14 +763,14 @@ static void set_load_weight(struct task_struct *p)
|
||||
static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
update_rq_clock(rq);
|
||||
sched_info_queued(p);
|
||||
sched_info_queued(rq, p);
|
||||
p->sched_class->enqueue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
update_rq_clock(rq);
|
||||
sched_info_dequeued(p);
|
||||
sched_info_dequeued(rq, p);
|
||||
p->sched_class->dequeue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
@@ -987,7 +983,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||
* ttwu() will sort out the placement.
|
||||
*/
|
||||
WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
|
||||
!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
|
||||
!(task_preempt_count(p) & PREEMPT_ACTIVE));
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
/*
|
||||
@@ -1017,6 +1013,107 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||
__set_task_cpu(p, new_cpu);
|
||||
}
|
||||
|
||||
static void __migrate_swap_task(struct task_struct *p, int cpu)
|
||||
{
|
||||
if (p->on_rq) {
|
||||
struct rq *src_rq, *dst_rq;
|
||||
|
||||
src_rq = task_rq(p);
|
||||
dst_rq = cpu_rq(cpu);
|
||||
|
||||
deactivate_task(src_rq, p, 0);
|
||||
set_task_cpu(p, cpu);
|
||||
activate_task(dst_rq, p, 0);
|
||||
check_preempt_curr(dst_rq, p, 0);
|
||||
} else {
|
||||
/*
|
||||
* Task isn't running anymore; make it appear like we migrated
|
||||
* it before it went to sleep. This means on wakeup we make the
|
||||
* previous cpu our targer instead of where it really is.
|
||||
*/
|
||||
p->wake_cpu = cpu;
|
||||
}
|
||||
}
|
||||
|
||||
struct migration_swap_arg {
|
||||
struct task_struct *src_task, *dst_task;
|
||||
int src_cpu, dst_cpu;
|
||||
};
|
||||
|
||||
static int migrate_swap_stop(void *data)
|
||||
{
|
||||
struct migration_swap_arg *arg = data;
|
||||
struct rq *src_rq, *dst_rq;
|
||||
int ret = -EAGAIN;
|
||||
|
||||
src_rq = cpu_rq(arg->src_cpu);
|
||||
dst_rq = cpu_rq(arg->dst_cpu);
|
||||
|
||||
double_raw_lock(&arg->src_task->pi_lock,
|
||||
&arg->dst_task->pi_lock);
|
||||
double_rq_lock(src_rq, dst_rq);
|
||||
if (task_cpu(arg->dst_task) != arg->dst_cpu)
|
||||
goto unlock;
|
||||
|
||||
if (task_cpu(arg->src_task) != arg->src_cpu)
|
||||
goto unlock;
|
||||
|
||||
if (!cpumask_test_cpu(arg->dst_cpu, tsk_cpus_allowed(arg->src_task)))
|
||||
goto unlock;
|
||||
|
||||
if (!cpumask_test_cpu(arg->src_cpu, tsk_cpus_allowed(arg->dst_task)))
|
||||
goto unlock;
|
||||
|
||||
__migrate_swap_task(arg->src_task, arg->dst_cpu);
|
||||
__migrate_swap_task(arg->dst_task, arg->src_cpu);
|
||||
|
||||
ret = 0;
|
||||
|
||||
unlock:
|
||||
double_rq_unlock(src_rq, dst_rq);
|
||||
raw_spin_unlock(&arg->dst_task->pi_lock);
|
||||
raw_spin_unlock(&arg->src_task->pi_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cross migrate two tasks
|
||||
*/
|
||||
int migrate_swap(struct task_struct *cur, struct task_struct *p)
|
||||
{
|
||||
struct migration_swap_arg arg;
|
||||
int ret = -EINVAL;
|
||||
|
||||
arg = (struct migration_swap_arg){
|
||||
.src_task = cur,
|
||||
.src_cpu = task_cpu(cur),
|
||||
.dst_task = p,
|
||||
.dst_cpu = task_cpu(p),
|
||||
};
|
||||
|
||||
if (arg.src_cpu == arg.dst_cpu)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* These three tests are all lockless; this is OK since all of them
|
||||
* will be re-checked with proper locks held further down the line.
|
||||
*/
|
||||
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
|
||||
goto out;
|
||||
|
||||
if (!cpumask_test_cpu(arg.dst_cpu, tsk_cpus_allowed(arg.src_task)))
|
||||
goto out;
|
||||
|
||||
if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task)))
|
||||
goto out;
|
||||
|
||||
ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct migration_arg {
|
||||
struct task_struct *task;
|
||||
int dest_cpu;
|
||||
@@ -1236,9 +1333,9 @@ out:
|
||||
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
|
||||
*/
|
||||
static inline
|
||||
int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
|
||||
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
{
|
||||
int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
|
||||
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
|
||||
|
||||
/*
|
||||
* In order not to call set_task_cpu() on a blocking task we need
|
||||
@@ -1330,12 +1427,13 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
|
||||
|
||||
if (rq->idle_stamp) {
|
||||
u64 delta = rq_clock(rq) - rq->idle_stamp;
|
||||
u64 max = 2*sysctl_sched_migration_cost;
|
||||
u64 max = 2*rq->max_idle_balance_cost;
|
||||
|
||||
if (delta > max)
|
||||
update_avg(&rq->avg_idle, delta);
|
||||
|
||||
if (rq->avg_idle > max)
|
||||
rq->avg_idle = max;
|
||||
else
|
||||
update_avg(&rq->avg_idle, delta);
|
||||
|
||||
rq->idle_stamp = 0;
|
||||
}
|
||||
#endif
|
||||
@@ -1396,6 +1494,14 @@ static void sched_ttwu_pending(void)
|
||||
|
||||
void scheduler_ipi(void)
|
||||
{
|
||||
/*
|
||||
* Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
|
||||
* TIF_NEED_RESCHED remotely (for the first time) will also send
|
||||
* this IPI.
|
||||
*/
|
||||
if (tif_need_resched())
|
||||
set_preempt_need_resched();
|
||||
|
||||
if (llist_empty(&this_rq()->wake_list)
|
||||
&& !tick_nohz_full_cpu(smp_processor_id())
|
||||
&& !got_nohz_idle_kick())
|
||||
@@ -1513,7 +1619,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
if (p->sched_class->task_waking)
|
||||
p->sched_class->task_waking(p);
|
||||
|
||||
cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
|
||||
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
|
||||
if (task_cpu(p) != cpu) {
|
||||
wake_flags |= WF_MIGRATED;
|
||||
set_task_cpu(p, cpu);
|
||||
@@ -1595,7 +1701,7 @@ int wake_up_state(struct task_struct *p, unsigned int state)
|
||||
*
|
||||
* __sched_fork() is basic setup used by init_idle() too:
|
||||
*/
|
||||
static void __sched_fork(struct task_struct *p)
|
||||
static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
{
|
||||
p->on_rq = 0;
|
||||
|
||||
@@ -1619,16 +1725,24 @@ static void __sched_fork(struct task_struct *p)
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
|
||||
p->mm->numa_next_scan = jiffies;
|
||||
p->mm->numa_next_reset = jiffies;
|
||||
p->mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
|
||||
p->mm->numa_scan_seq = 0;
|
||||
}
|
||||
|
||||
if (clone_flags & CLONE_VM)
|
||||
p->numa_preferred_nid = current->numa_preferred_nid;
|
||||
else
|
||||
p->numa_preferred_nid = -1;
|
||||
|
||||
p->node_stamp = 0ULL;
|
||||
p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
|
||||
p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
|
||||
p->numa_scan_period = sysctl_numa_balancing_scan_delay;
|
||||
p->numa_work.next = &p->numa_work;
|
||||
p->numa_faults = NULL;
|
||||
p->numa_faults_buffer = NULL;
|
||||
|
||||
INIT_LIST_HEAD(&p->numa_entry);
|
||||
p->numa_group = NULL;
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
}
|
||||
|
||||
@@ -1654,12 +1768,12 @@ void set_numabalancing_state(bool enabled)
|
||||
/*
|
||||
* fork()/clone()-time setup:
|
||||
*/
|
||||
void sched_fork(struct task_struct *p)
|
||||
void sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
int cpu = get_cpu();
|
||||
|
||||
__sched_fork(p);
|
||||
__sched_fork(clone_flags, p);
|
||||
/*
|
||||
* We mark the process as running here. This guarantees that
|
||||
* nobody will actually run it, and a signal or other external
|
||||
@@ -1717,10 +1831,7 @@ void sched_fork(struct task_struct *p)
|
||||
#if defined(CONFIG_SMP)
|
||||
p->on_cpu = 0;
|
||||
#endif
|
||||
#ifdef CONFIG_PREEMPT_COUNT
|
||||
/* Want to start with kernel preemption disabled. */
|
||||
task_thread_info(p)->preempt_count = 1;
|
||||
#endif
|
||||
init_task_preempt_count(p);
|
||||
#ifdef CONFIG_SMP
|
||||
plist_node_init(&p->pushable_tasks, MAX_PRIO);
|
||||
#endif
|
||||
@@ -1747,7 +1858,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
* - cpus_allowed can change in the fork path
|
||||
* - any previously selected cpu might disappear through hotplug
|
||||
*/
|
||||
set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
|
||||
set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
|
||||
#endif
|
||||
|
||||
/* Initialize new task's runnable average */
|
||||
@@ -1838,7 +1949,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
trace_sched_switch(prev, next);
|
||||
sched_info_switch(prev, next);
|
||||
sched_info_switch(rq, prev, next);
|
||||
perf_event_task_sched_out(prev, next);
|
||||
fire_sched_out_preempt_notifiers(prev, next);
|
||||
prepare_lock_switch(rq, next);
|
||||
@@ -1890,6 +2001,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
|
||||
if (mm)
|
||||
mmdrop(mm);
|
||||
if (unlikely(prev_state == TASK_DEAD)) {
|
||||
task_numa_free(prev);
|
||||
|
||||
/*
|
||||
* Remove function-return probe instances associated with this
|
||||
* task and put them back on the free list.
|
||||
@@ -2073,7 +2186,7 @@ void sched_exec(void)
|
||||
int dest_cpu;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
|
||||
dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
|
||||
if (dest_cpu == smp_processor_id())
|
||||
goto unlock;
|
||||
|
||||
@@ -2215,7 +2328,7 @@ notrace unsigned long get_parent_ip(unsigned long addr)
|
||||
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
|
||||
defined(CONFIG_PREEMPT_TRACER))
|
||||
|
||||
void __kprobes add_preempt_count(int val)
|
||||
void __kprobes preempt_count_add(int val)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
@@ -2224,7 +2337,7 @@ void __kprobes add_preempt_count(int val)
|
||||
if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
|
||||
return;
|
||||
#endif
|
||||
preempt_count() += val;
|
||||
__preempt_count_add(val);
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
* Spinlock count overflowing soon?
|
||||
@@ -2235,9 +2348,9 @@ void __kprobes add_preempt_count(int val)
|
||||
if (preempt_count() == val)
|
||||
trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
|
||||
}
|
||||
EXPORT_SYMBOL(add_preempt_count);
|
||||
EXPORT_SYMBOL(preempt_count_add);
|
||||
|
||||
void __kprobes sub_preempt_count(int val)
|
||||
void __kprobes preempt_count_sub(int val)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
@@ -2255,9 +2368,9 @@ void __kprobes sub_preempt_count(int val)
|
||||
|
||||
if (preempt_count() == val)
|
||||
trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
|
||||
preempt_count() -= val;
|
||||
__preempt_count_sub(val);
|
||||
}
|
||||
EXPORT_SYMBOL(sub_preempt_count);
|
||||
EXPORT_SYMBOL(preempt_count_sub);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -2430,6 +2543,7 @@ need_resched:
|
||||
put_prev_task(rq, prev);
|
||||
next = pick_next_task(rq);
|
||||
clear_tsk_need_resched(prev);
|
||||
clear_preempt_need_resched();
|
||||
rq->skip_clock_update = 0;
|
||||
|
||||
if (likely(prev != next)) {
|
||||
@@ -2520,9 +2634,9 @@ asmlinkage void __sched notrace preempt_schedule(void)
|
||||
return;
|
||||
|
||||
do {
|
||||
add_preempt_count_notrace(PREEMPT_ACTIVE);
|
||||
__preempt_count_add(PREEMPT_ACTIVE);
|
||||
__schedule();
|
||||
sub_preempt_count_notrace(PREEMPT_ACTIVE);
|
||||
__preempt_count_sub(PREEMPT_ACTIVE);
|
||||
|
||||
/*
|
||||
* Check again in case we missed a preemption opportunity
|
||||
@@ -2541,20 +2655,19 @@ EXPORT_SYMBOL(preempt_schedule);
|
||||
*/
|
||||
asmlinkage void __sched preempt_schedule_irq(void)
|
||||
{
|
||||
struct thread_info *ti = current_thread_info();
|
||||
enum ctx_state prev_state;
|
||||
|
||||
/* Catch callers which need to be fixed */
|
||||
BUG_ON(ti->preempt_count || !irqs_disabled());
|
||||
BUG_ON(preempt_count() || !irqs_disabled());
|
||||
|
||||
prev_state = exception_enter();
|
||||
|
||||
do {
|
||||
add_preempt_count(PREEMPT_ACTIVE);
|
||||
__preempt_count_add(PREEMPT_ACTIVE);
|
||||
local_irq_enable();
|
||||
__schedule();
|
||||
local_irq_disable();
|
||||
sub_preempt_count(PREEMPT_ACTIVE);
|
||||
__preempt_count_sub(PREEMPT_ACTIVE);
|
||||
|
||||
/*
|
||||
* Check again in case we missed a preemption opportunity
|
||||
@@ -2575,393 +2688,6 @@ int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
|
||||
}
|
||||
EXPORT_SYMBOL(default_wake_function);
|
||||
|
||||
/*
|
||||
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
|
||||
* wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
|
||||
* number) then we wake all the non-exclusive tasks and one exclusive task.
|
||||
*
|
||||
* There are circumstances in which we can try to wake a task which has already
|
||||
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
|
||||
* zero in this (rare) case, and we handle it by continuing to scan the queue.
|
||||
*/
|
||||
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
|
||||
int nr_exclusive, int wake_flags, void *key)
|
||||
{
|
||||
wait_queue_t *curr, *next;
|
||||
|
||||
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
|
||||
unsigned flags = curr->flags;
|
||||
|
||||
if (curr->func(curr, mode, wake_flags, key) &&
|
||||
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* __wake_up - wake up threads blocked on a waitqueue.
|
||||
* @q: the waitqueue
|
||||
* @mode: which threads
|
||||
* @nr_exclusive: how many wake-one or wake-many threads to wake up
|
||||
* @key: is directly passed to the wakeup function
|
||||
*
|
||||
* It may be assumed that this function implies a write memory barrier before
|
||||
* changing the task state if and only if any tasks are woken up.
|
||||
*/
|
||||
void __wake_up(wait_queue_head_t *q, unsigned int mode,
|
||||
int nr_exclusive, void *key)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
__wake_up_common(q, mode, nr_exclusive, 0, key);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(__wake_up);
|
||||
|
||||
/*
|
||||
* Same as __wake_up but called with the spinlock in wait_queue_head_t held.
|
||||
*/
|
||||
void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
|
||||
{
|
||||
__wake_up_common(q, mode, nr, 0, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_locked);
|
||||
|
||||
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
|
||||
{
|
||||
__wake_up_common(q, mode, 1, 0, key);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_locked_key);
|
||||
|
||||
/**
|
||||
* __wake_up_sync_key - wake up threads blocked on a waitqueue.
|
||||
* @q: the waitqueue
|
||||
* @mode: which threads
|
||||
* @nr_exclusive: how many wake-one or wake-many threads to wake up
|
||||
* @key: opaque value to be passed to wakeup targets
|
||||
*
|
||||
* The sync wakeup differs that the waker knows that it will schedule
|
||||
* away soon, so while the target thread will be woken up, it will not
|
||||
* be migrated to another CPU - ie. the two threads are 'synchronized'
|
||||
* with each other. This can prevent needless bouncing between CPUs.
|
||||
*
|
||||
* On UP it can prevent extra preemption.
|
||||
*
|
||||
* It may be assumed that this function implies a write memory barrier before
|
||||
* changing the task state if and only if any tasks are woken up.
|
||||
*/
|
||||
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
|
||||
int nr_exclusive, void *key)
|
||||
{
|
||||
unsigned long flags;
|
||||
int wake_flags = WF_SYNC;
|
||||
|
||||
if (unlikely(!q))
|
||||
return;
|
||||
|
||||
if (unlikely(nr_exclusive != 1))
|
||||
wake_flags = 0;
|
||||
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_sync_key);
|
||||
|
||||
/*
|
||||
* __wake_up_sync - see __wake_up_sync_key()
|
||||
*/
|
||||
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
|
||||
{
|
||||
__wake_up_sync_key(q, mode, nr_exclusive, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
|
||||
|
||||
/**
|
||||
* complete: - signals a single thread waiting on this completion
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This will wake up a single thread waiting on this completion. Threads will be
|
||||
* awakened in the same order in which they were queued.
|
||||
*
|
||||
* See also complete_all(), wait_for_completion() and related routines.
|
||||
*
|
||||
* It may be assumed that this function implies a write memory barrier before
|
||||
* changing the task state if and only if any tasks are woken up.
|
||||
*/
|
||||
void complete(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
x->done++;
|
||||
__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete);
|
||||
|
||||
/**
|
||||
* complete_all: - signals all threads waiting on this completion
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This will wake up all threads waiting on this particular completion event.
|
||||
*
|
||||
* It may be assumed that this function implies a write memory barrier before
|
||||
* changing the task state if and only if any tasks are woken up.
|
||||
*/
|
||||
void complete_all(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
x->done += UINT_MAX/2;
|
||||
__wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete_all);
|
||||
|
||||
static inline long __sched
|
||||
do_wait_for_common(struct completion *x,
|
||||
long (*action)(long), long timeout, int state)
|
||||
{
|
||||
if (!x->done) {
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
__add_wait_queue_tail_exclusive(&x->wait, &wait);
|
||||
do {
|
||||
if (signal_pending_state(state, current)) {
|
||||
timeout = -ERESTARTSYS;
|
||||
break;
|
||||
}
|
||||
__set_current_state(state);
|
||||
spin_unlock_irq(&x->wait.lock);
|
||||
timeout = action(timeout);
|
||||
spin_lock_irq(&x->wait.lock);
|
||||
} while (!x->done && timeout);
|
||||
__remove_wait_queue(&x->wait, &wait);
|
||||
if (!x->done)
|
||||
return timeout;
|
||||
}
|
||||
x->done--;
|
||||
return timeout ?: 1;
|
||||
}
|
||||
|
||||
static inline long __sched
|
||||
__wait_for_common(struct completion *x,
|
||||
long (*action)(long), long timeout, int state)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
spin_lock_irq(&x->wait.lock);
|
||||
timeout = do_wait_for_common(x, action, timeout, state);
|
||||
spin_unlock_irq(&x->wait.lock);
|
||||
return timeout;
|
||||
}
|
||||
|
||||
static long __sched
|
||||
wait_for_common(struct completion *x, long timeout, int state)
|
||||
{
|
||||
return __wait_for_common(x, schedule_timeout, timeout, state);
|
||||
}
|
||||
|
||||
static long __sched
|
||||
wait_for_common_io(struct completion *x, long timeout, int state)
|
||||
{
|
||||
return __wait_for_common(x, io_schedule_timeout, timeout, state);
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_for_completion: - waits for completion of a task
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It is NOT
|
||||
* interruptible and there is no timeout.
|
||||
*
|
||||
* See also similar routines (i.e. wait_for_completion_timeout()) with timeout
|
||||
* and interrupt capability. Also see complete().
|
||||
*/
|
||||
void __sched wait_for_completion(struct completion *x)
|
||||
{
|
||||
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion);
|
||||
|
||||
/**
|
||||
* wait_for_completion_timeout: - waits for completion of a task (w/timeout)
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. The timeout is in jiffies. It is not
|
||||
* interruptible.
|
||||
*
|
||||
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
|
||||
* till timeout) if completed.
|
||||
*/
|
||||
unsigned long __sched
|
||||
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
|
||||
{
|
||||
return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_io: - waits for completion of a task
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It is NOT
|
||||
* interruptible and there is no timeout. The caller is accounted as waiting
|
||||
* for IO.
|
||||
*/
|
||||
void __sched wait_for_completion_io(struct completion *x)
|
||||
{
|
||||
wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_io);
|
||||
|
||||
/**
|
||||
* wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. The timeout is in jiffies. It is not
|
||||
* interruptible. The caller is accounted as waiting for IO.
|
||||
*
|
||||
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
|
||||
* till timeout) if completed.
|
||||
*/
|
||||
unsigned long __sched
|
||||
wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
|
||||
{
|
||||
return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_io_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_interruptible: - waits for completion of a task (w/intr)
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits for completion of a specific task to be signaled. It is
|
||||
* interruptible.
|
||||
*
|
||||
* Return: -ERESTARTSYS if interrupted, 0 if completed.
|
||||
*/
|
||||
int __sched wait_for_completion_interruptible(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
|
||||
if (t == -ERESTARTSYS)
|
||||
return t;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_interruptible);
|
||||
|
||||
/**
|
||||
* wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
|
||||
*
|
||||
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
|
||||
* or number of jiffies left till timeout) if completed.
|
||||
*/
|
||||
long __sched
|
||||
wait_for_completion_interruptible_timeout(struct completion *x,
|
||||
unsigned long timeout)
|
||||
{
|
||||
return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_killable: - waits for completion of a task (killable)
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It can be
|
||||
* interrupted by a kill signal.
|
||||
*
|
||||
* Return: -ERESTARTSYS if interrupted, 0 if completed.
|
||||
*/
|
||||
int __sched wait_for_completion_killable(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
|
||||
if (t == -ERESTARTSYS)
|
||||
return t;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_killable);
|
||||
|
||||
/**
|
||||
* wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be
|
||||
* signaled or for a specified timeout to expire. It can be
|
||||
* interrupted by a kill signal. The timeout is in jiffies.
|
||||
*
|
||||
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
|
||||
* or number of jiffies left till timeout) if completed.
|
||||
*/
|
||||
long __sched
|
||||
wait_for_completion_killable_timeout(struct completion *x,
|
||||
unsigned long timeout)
|
||||
{
|
||||
return wait_for_common(x, timeout, TASK_KILLABLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_killable_timeout);
|
||||
|
||||
/**
|
||||
* try_wait_for_completion - try to decrement a completion without blocking
|
||||
* @x: completion structure
|
||||
*
|
||||
* Return: 0 if a decrement cannot be done without blocking
|
||||
* 1 if a decrement succeeded.
|
||||
*
|
||||
* If a completion is being used as a counting completion,
|
||||
* attempt to decrement the counter without blocking. This
|
||||
* enables us to avoid waiting if the resource the completion
|
||||
* is protecting is not available.
|
||||
*/
|
||||
bool try_wait_for_completion(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = 0;
|
||||
else
|
||||
x->done--;
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(try_wait_for_completion);
|
||||
|
||||
/**
|
||||
* completion_done - Test to see if a completion has any waiters
|
||||
* @x: completion structure
|
||||
*
|
||||
* Return: 0 if there are waiters (wait_for_completion() in progress)
|
||||
* 1 if there are no waiters.
|
||||
*
|
||||
*/
|
||||
bool completion_done(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = 0;
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(completion_done);
|
||||
|
||||
static long __sched
|
||||
sleep_on_common(wait_queue_head_t *q, int state, long timeout)
|
||||
{
|
||||
@@ -3598,13 +3324,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
|
||||
struct task_struct *p;
|
||||
int retval;
|
||||
|
||||
get_online_cpus();
|
||||
rcu_read_lock();
|
||||
|
||||
p = find_process_by_pid(pid);
|
||||
if (!p) {
|
||||
rcu_read_unlock();
|
||||
put_online_cpus();
|
||||
return -ESRCH;
|
||||
}
|
||||
|
||||
@@ -3661,7 +3385,6 @@ out_free_cpus_allowed:
|
||||
free_cpumask_var(cpus_allowed);
|
||||
out_put_task:
|
||||
put_task_struct(p);
|
||||
put_online_cpus();
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -3706,7 +3429,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
|
||||
unsigned long flags;
|
||||
int retval;
|
||||
|
||||
get_online_cpus();
|
||||
rcu_read_lock();
|
||||
|
||||
retval = -ESRCH;
|
||||
@@ -3719,12 +3441,11 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
|
||||
goto out_unlock;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
|
||||
cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
put_online_cpus();
|
||||
|
||||
return retval;
|
||||
}
|
||||
@@ -3794,16 +3515,11 @@ SYSCALL_DEFINE0(sched_yield)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int should_resched(void)
|
||||
{
|
||||
return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
|
||||
}
|
||||
|
||||
static void __cond_resched(void)
|
||||
{
|
||||
add_preempt_count(PREEMPT_ACTIVE);
|
||||
__preempt_count_add(PREEMPT_ACTIVE);
|
||||
__schedule();
|
||||
sub_preempt_count(PREEMPT_ACTIVE);
|
||||
__preempt_count_sub(PREEMPT_ACTIVE);
|
||||
}
|
||||
|
||||
int __sched _cond_resched(void)
|
||||
@@ -4186,7 +3902,7 @@ void init_idle(struct task_struct *idle, int cpu)
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
|
||||
__sched_fork(idle);
|
||||
__sched_fork(0, idle);
|
||||
idle->state = TASK_RUNNING;
|
||||
idle->se.exec_start = sched_clock();
|
||||
|
||||
@@ -4212,7 +3928,7 @@ void init_idle(struct task_struct *idle, int cpu)
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
/* Set the preempt count _outside_ the spinlocks! */
|
||||
task_thread_info(idle)->preempt_count = 0;
|
||||
init_idle_preempt_count(idle, cpu);
|
||||
|
||||
/*
|
||||
* The idle tasks have their own, simple scheduling class:
|
||||
@@ -4346,6 +4062,53 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
/* Migrate current task p to target_cpu */
|
||||
int migrate_task_to(struct task_struct *p, int target_cpu)
|
||||
{
|
||||
struct migration_arg arg = { p, target_cpu };
|
||||
int curr_cpu = task_cpu(p);
|
||||
|
||||
if (curr_cpu == target_cpu)
|
||||
return 0;
|
||||
|
||||
if (!cpumask_test_cpu(target_cpu, tsk_cpus_allowed(p)))
|
||||
return -EINVAL;
|
||||
|
||||
/* TODO: This is not properly updating schedstats */
|
||||
|
||||
return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Requeue a task on a given node and accurately track the number of NUMA
|
||||
* tasks on the runqueues
|
||||
*/
|
||||
void sched_setnuma(struct task_struct *p, int nid)
|
||||
{
|
||||
struct rq *rq;
|
||||
unsigned long flags;
|
||||
bool on_rq, running;
|
||||
|
||||
rq = task_rq_lock(p, &flags);
|
||||
on_rq = p->on_rq;
|
||||
running = task_current(rq, p);
|
||||
|
||||
if (on_rq)
|
||||
dequeue_task(rq, p, 0);
|
||||
if (running)
|
||||
p->sched_class->put_prev_task(rq, p);
|
||||
|
||||
p->numa_preferred_nid = nid;
|
||||
|
||||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (on_rq)
|
||||
enqueue_task(rq, p, 0);
|
||||
task_rq_unlock(rq, p, &flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* migration_cpu_stop - this will be executed by a highprio stopper thread
|
||||
* and performs thread migration by bumping thread off CPU then
|
||||
@@ -5119,6 +4882,9 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
|
||||
DEFINE_PER_CPU(struct sched_domain *, sd_llc);
|
||||
DEFINE_PER_CPU(int, sd_llc_size);
|
||||
DEFINE_PER_CPU(int, sd_llc_id);
|
||||
DEFINE_PER_CPU(struct sched_domain *, sd_numa);
|
||||
DEFINE_PER_CPU(struct sched_domain *, sd_busy);
|
||||
DEFINE_PER_CPU(struct sched_domain *, sd_asym);
|
||||
|
||||
static void update_top_cache_domain(int cpu)
|
||||
{
|
||||
@@ -5130,11 +4896,18 @@ static void update_top_cache_domain(int cpu)
|
||||
if (sd) {
|
||||
id = cpumask_first(sched_domain_span(sd));
|
||||
size = cpumask_weight(sched_domain_span(sd));
|
||||
rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);
|
||||
}
|
||||
|
||||
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
|
||||
per_cpu(sd_llc_size, cpu) = size;
|
||||
per_cpu(sd_llc_id, cpu) = id;
|
||||
|
||||
sd = lowest_flag_domain(cpu, SD_NUMA);
|
||||
rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
|
||||
|
||||
sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
|
||||
rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5654,6 +5427,7 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
|
||||
| 0*SD_SHARE_PKG_RESOURCES
|
||||
| 1*SD_SERIALIZE
|
||||
| 0*SD_PREFER_SIBLING
|
||||
| 1*SD_NUMA
|
||||
| sd_local_flags(level)
|
||||
,
|
||||
.last_balance = jiffies,
|
||||
@@ -6335,14 +6109,17 @@ void __init sched_init_smp(void)
|
||||
|
||||
sched_init_numa();
|
||||
|
||||
get_online_cpus();
|
||||
/*
|
||||
* There's no userspace yet to cause hotplug operations; hence all the
|
||||
* cpu masks are stable and all blatant races in the below code cannot
|
||||
* happen.
|
||||
*/
|
||||
mutex_lock(&sched_domains_mutex);
|
||||
init_sched_domains(cpu_active_mask);
|
||||
cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
|
||||
if (cpumask_empty(non_isolated_cpus))
|
||||
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
|
||||
mutex_unlock(&sched_domains_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
|
||||
hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
|
||||
@@ -6505,6 +6282,7 @@ void __init sched_init(void)
|
||||
rq->online = 0;
|
||||
rq->idle_stamp = 0;
|
||||
rq->avg_idle = 2*sysctl_sched_migration_cost;
|
||||
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
|
||||
|
||||
INIT_LIST_HEAD(&rq->cfs_tasks);
|
||||
|
||||
@@ -7277,7 +7055,12 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
|
||||
|
||||
runtime_enabled = quota != RUNTIME_INF;
|
||||
runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
|
||||
account_cfs_bandwidth_used(runtime_enabled, runtime_was_enabled);
|
||||
/*
|
||||
* If we need to toggle cfs_bandwidth_used, off->on must occur
|
||||
* before making related changes, and on->off must occur afterwards
|
||||
*/
|
||||
if (runtime_enabled && !runtime_was_enabled)
|
||||
cfs_bandwidth_usage_inc();
|
||||
raw_spin_lock_irq(&cfs_b->lock);
|
||||
cfs_b->period = ns_to_ktime(period);
|
||||
cfs_b->quota = quota;
|
||||
@@ -7303,6 +7086,8 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
|
||||
unthrottle_cfs_rq(cfs_rq);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
if (runtime_was_enabled && !runtime_enabled)
|
||||
cfs_bandwidth_usage_dec();
|
||||
out_unlock:
|
||||
mutex_unlock(&cfs_constraints_mutex);
|
||||
|
||||
|
@@ -15,6 +15,7 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/mempolicy.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
@@ -137,6 +138,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||
SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
|
||||
0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
SEQ_printf(m, " %d", cpu_to_node(task_cpu(p)));
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
SEQ_printf(m, " %s", task_group_path(task_group(p)));
|
||||
#endif
|
||||
@@ -159,7 +163,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
|
||||
read_lock_irqsave(&tasklist_lock, flags);
|
||||
|
||||
do_each_thread(g, p) {
|
||||
if (!p->on_rq || task_cpu(p) != rq_cpu)
|
||||
if (task_cpu(p) != rq_cpu)
|
||||
continue;
|
||||
|
||||
print_task(m, rq, p);
|
||||
@@ -225,6 +229,14 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
atomic_read(&cfs_rq->tg->runnable_avg));
|
||||
#endif
|
||||
#endif
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
SEQ_printf(m, " .%-30s: %d\n", "tg->cfs_bandwidth.timer_active",
|
||||
cfs_rq->tg->cfs_bandwidth.timer_active);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "throttled",
|
||||
cfs_rq->throttled);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "throttle_count",
|
||||
cfs_rq->throttle_count);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
print_cfs_group_stats(m, cpu, cfs_rq->tg);
|
||||
@@ -345,7 +357,7 @@ static void sched_debug_header(struct seq_file *m)
|
||||
cpu_clk = local_clock();
|
||||
local_irq_restore(flags);
|
||||
|
||||
SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
|
||||
SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
|
||||
init_utsname()->release,
|
||||
(int)strcspn(init_utsname()->version, " "),
|
||||
init_utsname()->version);
|
||||
@@ -488,6 +500,56 @@ static int __init init_sched_debug_procfs(void)
|
||||
|
||||
__initcall(init_sched_debug_procfs);
|
||||
|
||||
#define __P(F) \
|
||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
|
||||
#define P(F) \
|
||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
|
||||
#define __PN(F) \
|
||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||
#define PN(F) \
|
||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
|
||||
|
||||
|
||||
static void sched_show_numa(struct task_struct *p, struct seq_file *m)
|
||||
{
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
struct mempolicy *pol;
|
||||
int node, i;
|
||||
|
||||
if (p->mm)
|
||||
P(mm->numa_scan_seq);
|
||||
|
||||
task_lock(p);
|
||||
pol = p->mempolicy;
|
||||
if (pol && !(pol->flags & MPOL_F_MORON))
|
||||
pol = NULL;
|
||||
mpol_get(pol);
|
||||
task_unlock(p);
|
||||
|
||||
SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0));
|
||||
|
||||
for_each_online_node(node) {
|
||||
for (i = 0; i < 2; i++) {
|
||||
unsigned long nr_faults = -1;
|
||||
int cpu_current, home_node;
|
||||
|
||||
if (p->numa_faults)
|
||||
nr_faults = p->numa_faults[2*node + i];
|
||||
|
||||
cpu_current = !i ? (task_node(p) == node) :
|
||||
(pol && node_isset(node, pol->v.nodes));
|
||||
|
||||
home_node = (p->numa_preferred_nid == node);
|
||||
|
||||
SEQ_printf(m, "numa_faults, %d, %d, %d, %d, %ld\n",
|
||||
i, node, cpu_current, home_node, nr_faults);
|
||||
}
|
||||
}
|
||||
|
||||
mpol_put(pol);
|
||||
#endif
|
||||
}
|
||||
|
||||
void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
{
|
||||
unsigned long nr_switches;
|
||||
@@ -591,6 +653,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
SEQ_printf(m, "%-45s:%21Ld\n",
|
||||
"clock-delta", (long long)(t1-t0));
|
||||
}
|
||||
|
||||
sched_show_numa(p, m);
|
||||
}
|
||||
|
||||
void proc_sched_set_task(struct task_struct *p)
|
||||
|
1405
kernel/sched/fair.c
1405
kernel/sched/fair.c
文件差異過大導致無法顯示
Load Diff
@@ -63,10 +63,23 @@ SCHED_FEAT(LB_MIN, false)
|
||||
/*
|
||||
* Apply the automatic NUMA scheduling policy. Enabled automatically
|
||||
* at runtime if running on a NUMA machine. Can be controlled via
|
||||
* numa_balancing=. Allow PTE scanning to be forced on UMA machines
|
||||
* for debugging the core machinery.
|
||||
* numa_balancing=
|
||||
*/
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
SCHED_FEAT(NUMA, false)
|
||||
SCHED_FEAT(NUMA_FORCE, false)
|
||||
|
||||
/*
|
||||
* NUMA_FAVOUR_HIGHER will favor moving tasks towards nodes where a
|
||||
* higher number of hinting faults are recorded during active load
|
||||
* balancing.
|
||||
*/
|
||||
SCHED_FEAT(NUMA_FAVOUR_HIGHER, true)
|
||||
|
||||
/*
|
||||
* NUMA_RESIST_LOWER will resist moving tasks towards nodes where a
|
||||
* lower number of hinting faults have been recorded. As this has
|
||||
* the potential to prevent a task ever migrating to a new node
|
||||
* due to CPU overload it is disabled by default.
|
||||
*/
|
||||
SCHED_FEAT(NUMA_RESIST_LOWER, false)
|
||||
#endif
|
||||
|
@@ -9,7 +9,7 @@
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
|
||||
select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
{
|
||||
return task_cpu(p); /* IDLE tasks as never migrated */
|
||||
}
|
||||
|
@@ -246,8 +246,10 @@ static inline void rt_set_overload(struct rq *rq)
|
||||
* if we should look at the mask. It would be a shame
|
||||
* if we looked at the mask, but the mask was not
|
||||
* updated yet.
|
||||
*
|
||||
* Matched by the barrier in pull_rt_task().
|
||||
*/
|
||||
wmb();
|
||||
smp_wmb();
|
||||
atomic_inc(&rq->rd->rto_count);
|
||||
}
|
||||
|
||||
@@ -1169,13 +1171,10 @@ static void yield_task_rt(struct rq *rq)
|
||||
static int find_lowest_rq(struct task_struct *task);
|
||||
|
||||
static int
|
||||
select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
|
||||
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
{
|
||||
struct task_struct *curr;
|
||||
struct rq *rq;
|
||||
int cpu;
|
||||
|
||||
cpu = task_cpu(p);
|
||||
|
||||
if (p->nr_cpus_allowed == 1)
|
||||
goto out;
|
||||
@@ -1213,8 +1212,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
|
||||
*/
|
||||
if (curr && unlikely(rt_task(curr)) &&
|
||||
(curr->nr_cpus_allowed < 2 ||
|
||||
curr->prio <= p->prio) &&
|
||||
(p->nr_cpus_allowed > 1)) {
|
||||
curr->prio <= p->prio)) {
|
||||
int target = find_lowest_rq(p);
|
||||
|
||||
if (target != -1)
|
||||
@@ -1630,6 +1628,12 @@ static int pull_rt_task(struct rq *this_rq)
|
||||
if (likely(!rt_overloaded(this_rq)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Match the barrier from rt_set_overloaded; this guarantees that if we
|
||||
* see overloaded we must also see the rto_mask bit.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
for_each_cpu(cpu, this_rq->rd->rto_mask) {
|
||||
if (this_cpu == cpu)
|
||||
continue;
|
||||
@@ -1931,8 +1935,8 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
|
||||
p->rt.time_slice = sched_rr_timeslice;
|
||||
|
||||
/*
|
||||
* Requeue to the end of queue if we (and all of our ancestors) are the
|
||||
* only element on the queue
|
||||
* Requeue to the end of queue if we (and all of our ancestors) are not
|
||||
* the only element on the queue
|
||||
*/
|
||||
for_each_sched_rt_entity(rt_se) {
|
||||
if (rt_se->run_list.prev != rt_se->run_list.next) {
|
||||
|
@@ -6,6 +6,7 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "cpupri.h"
|
||||
#include "cpuacct.h"
|
||||
@@ -408,6 +409,10 @@ struct rq {
|
||||
* remote CPUs use both these fields when doing load calculation.
|
||||
*/
|
||||
unsigned int nr_running;
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
unsigned int nr_numa_running;
|
||||
unsigned int nr_preferred_running;
|
||||
#endif
|
||||
#define CPU_LOAD_IDX_MAX 5
|
||||
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
|
||||
unsigned long last_load_update_tick;
|
||||
@@ -476,6 +481,9 @@ struct rq {
|
||||
u64 age_stamp;
|
||||
u64 idle_stamp;
|
||||
u64 avg_idle;
|
||||
|
||||
/* This is used to determine avg_idle's max value */
|
||||
u64 max_idle_balance_cost;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
@@ -552,6 +560,12 @@ static inline u64 rq_clock_task(struct rq *rq)
|
||||
return rq->clock_task;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
extern void sched_setnuma(struct task_struct *p, int node);
|
||||
extern int migrate_task_to(struct task_struct *p, int cpu);
|
||||
extern int migrate_swap(struct task_struct *, struct task_struct *);
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
#define rcu_dereference_check_sched_domain(p) \
|
||||
@@ -593,9 +607,24 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
|
||||
return hsd;
|
||||
}
|
||||
|
||||
static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
|
||||
{
|
||||
struct sched_domain *sd;
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
if (sd->flags & flag)
|
||||
break;
|
||||
}
|
||||
|
||||
return sd;
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(struct sched_domain *, sd_llc);
|
||||
DECLARE_PER_CPU(int, sd_llc_size);
|
||||
DECLARE_PER_CPU(int, sd_llc_id);
|
||||
DECLARE_PER_CPU(struct sched_domain *, sd_numa);
|
||||
DECLARE_PER_CPU(struct sched_domain *, sd_busy);
|
||||
DECLARE_PER_CPU(struct sched_domain *, sd_asym);
|
||||
|
||||
struct sched_group_power {
|
||||
atomic_t ref;
|
||||
@@ -605,6 +634,7 @@ struct sched_group_power {
|
||||
*/
|
||||
unsigned int power, power_orig;
|
||||
unsigned long next_update;
|
||||
int imbalance; /* XXX unrelated to power but shared group state */
|
||||
/*
|
||||
* Number of busy cpus in this group.
|
||||
*/
|
||||
@@ -719,6 +749,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
|
||||
*/
|
||||
smp_wmb();
|
||||
task_thread_info(p)->cpu = cpu;
|
||||
p->wake_cpu = cpu;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -974,7 +1005,7 @@ struct sched_class {
|
||||
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
|
||||
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
|
||||
void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
|
||||
|
||||
void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
|
||||
@@ -1220,6 +1251,24 @@ static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
|
||||
lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
|
||||
}
|
||||
|
||||
static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
|
||||
{
|
||||
if (l1 > l2)
|
||||
swap(l1, l2);
|
||||
|
||||
spin_lock(l1);
|
||||
spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
|
||||
}
|
||||
|
||||
static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
|
||||
{
|
||||
if (l1 > l2)
|
||||
swap(l1, l2);
|
||||
|
||||
raw_spin_lock(l1);
|
||||
raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
|
||||
}
|
||||
|
||||
/*
|
||||
* double_rq_lock - safely lock two runqueues
|
||||
*
|
||||
@@ -1305,7 +1354,8 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
|
||||
extern void init_cfs_rq(struct cfs_rq *cfs_rq);
|
||||
extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
|
||||
|
||||
extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
|
||||
extern void cfs_bandwidth_usage_inc(void);
|
||||
extern void cfs_bandwidth_usage_dec(void);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
enum rq_nohz_flag_bits {
|
||||
|
@@ -59,9 +59,9 @@ static inline void sched_info_reset_dequeued(struct task_struct *t)
|
||||
* from dequeue_task() to account for possible rq->clock skew across cpus. The
|
||||
* delta taken on each cpu would annul the skew.
|
||||
*/
|
||||
static inline void sched_info_dequeued(struct task_struct *t)
|
||||
static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
|
||||
{
|
||||
unsigned long long now = rq_clock(task_rq(t)), delta = 0;
|
||||
unsigned long long now = rq_clock(rq), delta = 0;
|
||||
|
||||
if (unlikely(sched_info_on()))
|
||||
if (t->sched_info.last_queued)
|
||||
@@ -69,7 +69,7 @@ static inline void sched_info_dequeued(struct task_struct *t)
|
||||
sched_info_reset_dequeued(t);
|
||||
t->sched_info.run_delay += delta;
|
||||
|
||||
rq_sched_info_dequeued(task_rq(t), delta);
|
||||
rq_sched_info_dequeued(rq, delta);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -77,9 +77,9 @@ static inline void sched_info_dequeued(struct task_struct *t)
|
||||
* long it was waiting to run. We also note when it began so that we
|
||||
* can keep stats on how long its timeslice is.
|
||||
*/
|
||||
static void sched_info_arrive(struct task_struct *t)
|
||||
static void sched_info_arrive(struct rq *rq, struct task_struct *t)
|
||||
{
|
||||
unsigned long long now = rq_clock(task_rq(t)), delta = 0;
|
||||
unsigned long long now = rq_clock(rq), delta = 0;
|
||||
|
||||
if (t->sched_info.last_queued)
|
||||
delta = now - t->sched_info.last_queued;
|
||||
@@ -88,7 +88,7 @@ static void sched_info_arrive(struct task_struct *t)
|
||||
t->sched_info.last_arrival = now;
|
||||
t->sched_info.pcount++;
|
||||
|
||||
rq_sched_info_arrive(task_rq(t), delta);
|
||||
rq_sched_info_arrive(rq, delta);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -96,11 +96,11 @@ static void sched_info_arrive(struct task_struct *t)
|
||||
* the timestamp if it is already not set. It's assumed that
|
||||
* sched_info_dequeued() will clear that stamp when appropriate.
|
||||
*/
|
||||
static inline void sched_info_queued(struct task_struct *t)
|
||||
static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
|
||||
{
|
||||
if (unlikely(sched_info_on()))
|
||||
if (!t->sched_info.last_queued)
|
||||
t->sched_info.last_queued = rq_clock(task_rq(t));
|
||||
t->sched_info.last_queued = rq_clock(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -111,15 +111,15 @@ static inline void sched_info_queued(struct task_struct *t)
|
||||
* sched_info_queued() to mark that it has now again started waiting on
|
||||
* the runqueue.
|
||||
*/
|
||||
static inline void sched_info_depart(struct task_struct *t)
|
||||
static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
|
||||
{
|
||||
unsigned long long delta = rq_clock(task_rq(t)) -
|
||||
unsigned long long delta = rq_clock(rq) -
|
||||
t->sched_info.last_arrival;
|
||||
|
||||
rq_sched_info_depart(task_rq(t), delta);
|
||||
rq_sched_info_depart(rq, delta);
|
||||
|
||||
if (t->state == TASK_RUNNING)
|
||||
sched_info_queued(t);
|
||||
sched_info_queued(rq, t);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -128,32 +128,34 @@ static inline void sched_info_depart(struct task_struct *t)
|
||||
* the idle task.) We are only called when prev != next.
|
||||
*/
|
||||
static inline void
|
||||
__sched_info_switch(struct task_struct *prev, struct task_struct *next)
|
||||
__sched_info_switch(struct rq *rq,
|
||||
struct task_struct *prev, struct task_struct *next)
|
||||
{
|
||||
struct rq *rq = task_rq(prev);
|
||||
|
||||
/*
|
||||
* prev now departs the cpu. It's not interesting to record
|
||||
* stats about how efficient we were at scheduling the idle
|
||||
* process, however.
|
||||
*/
|
||||
if (prev != rq->idle)
|
||||
sched_info_depart(prev);
|
||||
sched_info_depart(rq, prev);
|
||||
|
||||
if (next != rq->idle)
|
||||
sched_info_arrive(next);
|
||||
sched_info_arrive(rq, next);
|
||||
}
|
||||
static inline void
|
||||
sched_info_switch(struct task_struct *prev, struct task_struct *next)
|
||||
sched_info_switch(struct rq *rq,
|
||||
struct task_struct *prev, struct task_struct *next)
|
||||
{
|
||||
if (unlikely(sched_info_on()))
|
||||
__sched_info_switch(prev, next);
|
||||
__sched_info_switch(rq, prev, next);
|
||||
}
|
||||
#else
|
||||
#define sched_info_queued(t) do { } while (0)
|
||||
#define sched_info_queued(rq, t) do { } while (0)
|
||||
#define sched_info_reset_dequeued(t) do { } while (0)
|
||||
#define sched_info_dequeued(t) do { } while (0)
|
||||
#define sched_info_switch(t, next) do { } while (0)
|
||||
#define sched_info_dequeued(rq, t) do { } while (0)
|
||||
#define sched_info_depart(rq, t) do { } while (0)
|
||||
#define sched_info_arrive(rq, next) do { } while (0)
|
||||
#define sched_info_switch(rq, t, next) do { } while (0)
|
||||
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
|
||||
|
||||
/*
|
||||
|
@@ -11,7 +11,7 @@
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
select_task_rq_stop(struct task_struct *p, int sd_flag, int flags)
|
||||
select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
|
||||
{
|
||||
return task_cpu(p); /* stop tasks as never migrate */
|
||||
}
|
||||
|
@@ -52,6 +52,109 @@ void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
|
||||
EXPORT_SYMBOL(remove_wait_queue);
|
||||
|
||||
|
||||
/*
|
||||
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
|
||||
* wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
|
||||
* number) then we wake all the non-exclusive tasks and one exclusive task.
|
||||
*
|
||||
* There are circumstances in which we can try to wake a task which has already
|
||||
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
|
||||
* zero in this (rare) case, and we handle it by continuing to scan the queue.
|
||||
*/
|
||||
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
|
||||
int nr_exclusive, int wake_flags, void *key)
|
||||
{
|
||||
wait_queue_t *curr, *next;
|
||||
|
||||
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
|
||||
unsigned flags = curr->flags;
|
||||
|
||||
if (curr->func(curr, mode, wake_flags, key) &&
|
||||
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* __wake_up - wake up threads blocked on a waitqueue.
|
||||
* @q: the waitqueue
|
||||
* @mode: which threads
|
||||
* @nr_exclusive: how many wake-one or wake-many threads to wake up
|
||||
* @key: is directly passed to the wakeup function
|
||||
*
|
||||
* It may be assumed that this function implies a write memory barrier before
|
||||
* changing the task state if and only if any tasks are woken up.
|
||||
*/
|
||||
void __wake_up(wait_queue_head_t *q, unsigned int mode,
|
||||
int nr_exclusive, void *key)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
__wake_up_common(q, mode, nr_exclusive, 0, key);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(__wake_up);
|
||||
|
||||
/*
|
||||
* Same as __wake_up but called with the spinlock in wait_queue_head_t held.
|
||||
*/
|
||||
void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
|
||||
{
|
||||
__wake_up_common(q, mode, nr, 0, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_locked);
|
||||
|
||||
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
|
||||
{
|
||||
__wake_up_common(q, mode, 1, 0, key);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_locked_key);
|
||||
|
||||
/**
|
||||
* __wake_up_sync_key - wake up threads blocked on a waitqueue.
|
||||
* @q: the waitqueue
|
||||
* @mode: which threads
|
||||
* @nr_exclusive: how many wake-one or wake-many threads to wake up
|
||||
* @key: opaque value to be passed to wakeup targets
|
||||
*
|
||||
* The sync wakeup differs that the waker knows that it will schedule
|
||||
* away soon, so while the target thread will be woken up, it will not
|
||||
* be migrated to another CPU - ie. the two threads are 'synchronized'
|
||||
* with each other. This can prevent needless bouncing between CPUs.
|
||||
*
|
||||
* On UP it can prevent extra preemption.
|
||||
*
|
||||
* It may be assumed that this function implies a write memory barrier before
|
||||
* changing the task state if and only if any tasks are woken up.
|
||||
*/
|
||||
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
|
||||
int nr_exclusive, void *key)
|
||||
{
|
||||
unsigned long flags;
|
||||
int wake_flags = 1; /* XXX WF_SYNC */
|
||||
|
||||
if (unlikely(!q))
|
||||
return;
|
||||
|
||||
if (unlikely(nr_exclusive != 1))
|
||||
wake_flags = 0;
|
||||
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_sync_key);
|
||||
|
||||
/*
|
||||
* __wake_up_sync - see __wake_up_sync_key()
|
||||
*/
|
||||
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
|
||||
{
|
||||
__wake_up_sync_key(q, mode, nr_exclusive, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
|
||||
|
||||
/*
|
||||
* Note: we use "set_current_state()" _after_ the wait-queue add,
|
||||
* because we need a memory barrier there on SMP, so that any
|
||||
@@ -92,6 +195,30 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_wait_exclusive);
|
||||
|
||||
long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (signal_pending_state(state, current))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
wait->private = current;
|
||||
wait->func = autoremove_wake_function;
|
||||
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
if (list_empty(&wait->task_list)) {
|
||||
if (wait->flags & WQ_FLAG_EXCLUSIVE)
|
||||
__add_wait_queue_tail(q, wait);
|
||||
else
|
||||
__add_wait_queue(q, wait);
|
||||
}
|
||||
set_current_state(state);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_wait_event);
|
||||
|
||||
/**
|
||||
* finish_wait - clean up after waiting in a queue
|
||||
* @q: waitqueue waited on
|
@@ -99,13 +99,13 @@ static void __local_bh_disable(unsigned long ip, unsigned int cnt)
|
||||
|
||||
raw_local_irq_save(flags);
|
||||
/*
|
||||
* The preempt tracer hooks into add_preempt_count and will break
|
||||
* The preempt tracer hooks into preempt_count_add and will break
|
||||
* lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
|
||||
* is set and before current->softirq_enabled is cleared.
|
||||
* We must manually increment preempt_count here and manually
|
||||
* call the trace_preempt_off later.
|
||||
*/
|
||||
preempt_count() += cnt;
|
||||
__preempt_count_add(cnt);
|
||||
/*
|
||||
* Were softirqs turned off above:
|
||||
*/
|
||||
@@ -119,7 +119,7 @@ static void __local_bh_disable(unsigned long ip, unsigned int cnt)
|
||||
#else /* !CONFIG_TRACE_IRQFLAGS */
|
||||
static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
|
||||
{
|
||||
add_preempt_count(cnt);
|
||||
preempt_count_add(cnt);
|
||||
barrier();
|
||||
}
|
||||
#endif /* CONFIG_TRACE_IRQFLAGS */
|
||||
@@ -137,7 +137,7 @@ static void __local_bh_enable(unsigned int cnt)
|
||||
|
||||
if (softirq_count() == cnt)
|
||||
trace_softirqs_on(_RET_IP_);
|
||||
sub_preempt_count(cnt);
|
||||
preempt_count_sub(cnt);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -168,7 +168,7 @@ static inline void _local_bh_enable_ip(unsigned long ip)
|
||||
* Keep preemption disabled until we are done with
|
||||
* softirq processing:
|
||||
*/
|
||||
sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
|
||||
preempt_count_sub(SOFTIRQ_DISABLE_OFFSET - 1);
|
||||
|
||||
if (unlikely(!in_interrupt() && local_softirq_pending())) {
|
||||
/*
|
||||
@@ -178,7 +178,7 @@ static inline void _local_bh_enable_ip(unsigned long ip)
|
||||
do_softirq();
|
||||
}
|
||||
|
||||
dec_preempt_count();
|
||||
preempt_count_dec();
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
local_irq_enable();
|
||||
#endif
|
||||
@@ -260,7 +260,7 @@ restart:
|
||||
" exited with %08x?\n", vec_nr,
|
||||
softirq_to_name[vec_nr], h->action,
|
||||
prev_count, preempt_count());
|
||||
preempt_count() = prev_count;
|
||||
preempt_count_set(prev_count);
|
||||
}
|
||||
|
||||
rcu_bh_qs(cpu);
|
||||
@@ -378,7 +378,7 @@ void irq_exit(void)
|
||||
|
||||
account_irq_exit_time(current);
|
||||
trace_hardirq_exit();
|
||||
sub_preempt_count(HARDIRQ_OFFSET);
|
||||
preempt_count_sub(HARDIRQ_OFFSET);
|
||||
if (!in_interrupt() && local_softirq_pending())
|
||||
invoke_softirq();
|
||||
|
||||
|
@@ -20,6 +20,7 @@
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/smpboot.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/lglock.h>
|
||||
|
||||
/*
|
||||
* Structure to determine completion condition and record errors. May
|
||||
@@ -43,6 +44,14 @@ static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
|
||||
static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
|
||||
static bool stop_machine_initialized = false;
|
||||
|
||||
/*
|
||||
* Avoids a race between stop_two_cpus and global stop_cpus, where
|
||||
* the stoppers could get queued up in reverse order, leading to
|
||||
* system deadlock. Using an lglock means stop_two_cpus remains
|
||||
* relatively cheap.
|
||||
*/
|
||||
DEFINE_STATIC_LGLOCK(stop_cpus_lock);
|
||||
|
||||
static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
|
||||
{
|
||||
memset(done, 0, sizeof(*done));
|
||||
@@ -115,6 +124,184 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
|
||||
return done.executed ? done.ret : -ENOENT;
|
||||
}
|
||||
|
||||
/* This controls the threads on each CPU. */
|
||||
enum multi_stop_state {
|
||||
/* Dummy starting state for thread. */
|
||||
MULTI_STOP_NONE,
|
||||
/* Awaiting everyone to be scheduled. */
|
||||
MULTI_STOP_PREPARE,
|
||||
/* Disable interrupts. */
|
||||
MULTI_STOP_DISABLE_IRQ,
|
||||
/* Run the function */
|
||||
MULTI_STOP_RUN,
|
||||
/* Exit */
|
||||
MULTI_STOP_EXIT,
|
||||
};
|
||||
|
||||
struct multi_stop_data {
|
||||
int (*fn)(void *);
|
||||
void *data;
|
||||
/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
|
||||
unsigned int num_threads;
|
||||
const struct cpumask *active_cpus;
|
||||
|
||||
enum multi_stop_state state;
|
||||
atomic_t thread_ack;
|
||||
};
|
||||
|
||||
static void set_state(struct multi_stop_data *msdata,
|
||||
enum multi_stop_state newstate)
|
||||
{
|
||||
/* Reset ack counter. */
|
||||
atomic_set(&msdata->thread_ack, msdata->num_threads);
|
||||
smp_wmb();
|
||||
msdata->state = newstate;
|
||||
}
|
||||
|
||||
/* Last one to ack a state moves to the next state. */
|
||||
static void ack_state(struct multi_stop_data *msdata)
|
||||
{
|
||||
if (atomic_dec_and_test(&msdata->thread_ack))
|
||||
set_state(msdata, msdata->state + 1);
|
||||
}
|
||||
|
||||
/* This is the cpu_stop function which stops the CPU. */
|
||||
static int multi_cpu_stop(void *data)
|
||||
{
|
||||
struct multi_stop_data *msdata = data;
|
||||
enum multi_stop_state curstate = MULTI_STOP_NONE;
|
||||
int cpu = smp_processor_id(), err = 0;
|
||||
unsigned long flags;
|
||||
bool is_active;
|
||||
|
||||
/*
|
||||
* When called from stop_machine_from_inactive_cpu(), irq might
|
||||
* already be disabled. Save the state and restore it on exit.
|
||||
*/
|
||||
local_save_flags(flags);
|
||||
|
||||
if (!msdata->active_cpus)
|
||||
is_active = cpu == cpumask_first(cpu_online_mask);
|
||||
else
|
||||
is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
|
||||
|
||||
/* Simple state machine */
|
||||
do {
|
||||
/* Chill out and ensure we re-read multi_stop_state. */
|
||||
cpu_relax();
|
||||
if (msdata->state != curstate) {
|
||||
curstate = msdata->state;
|
||||
switch (curstate) {
|
||||
case MULTI_STOP_DISABLE_IRQ:
|
||||
local_irq_disable();
|
||||
hard_irq_disable();
|
||||
break;
|
||||
case MULTI_STOP_RUN:
|
||||
if (is_active)
|
||||
err = msdata->fn(msdata->data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
ack_state(msdata);
|
||||
}
|
||||
} while (curstate != MULTI_STOP_EXIT);
|
||||
|
||||
local_irq_restore(flags);
|
||||
return err;
|
||||
}
|
||||
|
||||
struct irq_cpu_stop_queue_work_info {
|
||||
int cpu1;
|
||||
int cpu2;
|
||||
struct cpu_stop_work *work1;
|
||||
struct cpu_stop_work *work2;
|
||||
};
|
||||
|
||||
/*
|
||||
* This function is always run with irqs and preemption disabled.
|
||||
* This guarantees that both work1 and work2 get queued, before
|
||||
* our local migrate thread gets the chance to preempt us.
|
||||
*/
|
||||
static void irq_cpu_stop_queue_work(void *arg)
|
||||
{
|
||||
struct irq_cpu_stop_queue_work_info *info = arg;
|
||||
cpu_stop_queue_work(info->cpu1, info->work1);
|
||||
cpu_stop_queue_work(info->cpu2, info->work2);
|
||||
}
|
||||
|
||||
/**
|
||||
* stop_two_cpus - stops two cpus
|
||||
* @cpu1: the cpu to stop
|
||||
* @cpu2: the other cpu to stop
|
||||
* @fn: function to execute
|
||||
* @arg: argument to @fn
|
||||
*
|
||||
* Stops both the current and specified CPU and runs @fn on one of them.
|
||||
*
|
||||
* returns when both are completed.
|
||||
*/
|
||||
int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
|
||||
{
|
||||
struct cpu_stop_done done;
|
||||
struct cpu_stop_work work1, work2;
|
||||
struct irq_cpu_stop_queue_work_info call_args;
|
||||
struct multi_stop_data msdata;
|
||||
|
||||
preempt_disable();
|
||||
msdata = (struct multi_stop_data){
|
||||
.fn = fn,
|
||||
.data = arg,
|
||||
.num_threads = 2,
|
||||
.active_cpus = cpumask_of(cpu1),
|
||||
};
|
||||
|
||||
work1 = work2 = (struct cpu_stop_work){
|
||||
.fn = multi_cpu_stop,
|
||||
.arg = &msdata,
|
||||
.done = &done
|
||||
};
|
||||
|
||||
call_args = (struct irq_cpu_stop_queue_work_info){
|
||||
.cpu1 = cpu1,
|
||||
.cpu2 = cpu2,
|
||||
.work1 = &work1,
|
||||
.work2 = &work2,
|
||||
};
|
||||
|
||||
cpu_stop_init_done(&done, 2);
|
||||
set_state(&msdata, MULTI_STOP_PREPARE);
|
||||
|
||||
/*
|
||||
* If we observe both CPUs active we know _cpu_down() cannot yet have
|
||||
* queued its stop_machine works and therefore ours will get executed
|
||||
* first. Or its not either one of our CPUs that's getting unplugged,
|
||||
* in which case we don't care.
|
||||
*
|
||||
* This relies on the stopper workqueues to be FIFO.
|
||||
*/
|
||||
if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
|
||||
preempt_enable();
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
lg_local_lock(&stop_cpus_lock);
|
||||
/*
|
||||
* Queuing needs to be done by the lowest numbered CPU, to ensure
|
||||
* that works are always queued in the same order on every CPU.
|
||||
* This prevents deadlocks.
|
||||
*/
|
||||
smp_call_function_single(min(cpu1, cpu2),
|
||||
&irq_cpu_stop_queue_work,
|
||||
&call_args, 0);
|
||||
lg_local_unlock(&stop_cpus_lock);
|
||||
preempt_enable();
|
||||
|
||||
wait_for_completion(&done.completion);
|
||||
|
||||
return done.executed ? done.ret : -ENOENT;
|
||||
}
|
||||
|
||||
/**
|
||||
* stop_one_cpu_nowait - stop a cpu but don't wait for completion
|
||||
* @cpu: cpu to stop
|
||||
@@ -159,10 +346,10 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
|
||||
* preempted by a stopper which might wait for other stoppers
|
||||
* to enter @fn which can lead to deadlock.
|
||||
*/
|
||||
preempt_disable();
|
||||
lg_global_lock(&stop_cpus_lock);
|
||||
for_each_cpu(cpu, cpumask)
|
||||
cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
|
||||
preempt_enable();
|
||||
lg_global_unlock(&stop_cpus_lock);
|
||||
}
|
||||
|
||||
static int __stop_cpus(const struct cpumask *cpumask,
|
||||
@@ -359,98 +546,14 @@ early_initcall(cpu_stop_init);
|
||||
|
||||
#ifdef CONFIG_STOP_MACHINE
|
||||
|
||||
/* This controls the threads on each CPU. */
|
||||
enum stopmachine_state {
|
||||
/* Dummy starting state for thread. */
|
||||
STOPMACHINE_NONE,
|
||||
/* Awaiting everyone to be scheduled. */
|
||||
STOPMACHINE_PREPARE,
|
||||
/* Disable interrupts. */
|
||||
STOPMACHINE_DISABLE_IRQ,
|
||||
/* Run the function */
|
||||
STOPMACHINE_RUN,
|
||||
/* Exit */
|
||||
STOPMACHINE_EXIT,
|
||||
};
|
||||
|
||||
struct stop_machine_data {
|
||||
int (*fn)(void *);
|
||||
void *data;
|
||||
/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
|
||||
unsigned int num_threads;
|
||||
const struct cpumask *active_cpus;
|
||||
|
||||
enum stopmachine_state state;
|
||||
atomic_t thread_ack;
|
||||
};
|
||||
|
||||
static void set_state(struct stop_machine_data *smdata,
|
||||
enum stopmachine_state newstate)
|
||||
{
|
||||
/* Reset ack counter. */
|
||||
atomic_set(&smdata->thread_ack, smdata->num_threads);
|
||||
smp_wmb();
|
||||
smdata->state = newstate;
|
||||
}
|
||||
|
||||
/* Last one to ack a state moves to the next state. */
|
||||
static void ack_state(struct stop_machine_data *smdata)
|
||||
{
|
||||
if (atomic_dec_and_test(&smdata->thread_ack))
|
||||
set_state(smdata, smdata->state + 1);
|
||||
}
|
||||
|
||||
/* This is the cpu_stop function which stops the CPU. */
|
||||
static int stop_machine_cpu_stop(void *data)
|
||||
{
|
||||
struct stop_machine_data *smdata = data;
|
||||
enum stopmachine_state curstate = STOPMACHINE_NONE;
|
||||
int cpu = smp_processor_id(), err = 0;
|
||||
unsigned long flags;
|
||||
bool is_active;
|
||||
|
||||
/*
|
||||
* When called from stop_machine_from_inactive_cpu(), irq might
|
||||
* already be disabled. Save the state and restore it on exit.
|
||||
*/
|
||||
local_save_flags(flags);
|
||||
|
||||
if (!smdata->active_cpus)
|
||||
is_active = cpu == cpumask_first(cpu_online_mask);
|
||||
else
|
||||
is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
|
||||
|
||||
/* Simple state machine */
|
||||
do {
|
||||
/* Chill out and ensure we re-read stopmachine_state. */
|
||||
cpu_relax();
|
||||
if (smdata->state != curstate) {
|
||||
curstate = smdata->state;
|
||||
switch (curstate) {
|
||||
case STOPMACHINE_DISABLE_IRQ:
|
||||
local_irq_disable();
|
||||
hard_irq_disable();
|
||||
break;
|
||||
case STOPMACHINE_RUN:
|
||||
if (is_active)
|
||||
err = smdata->fn(smdata->data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
ack_state(smdata);
|
||||
}
|
||||
} while (curstate != STOPMACHINE_EXIT);
|
||||
|
||||
local_irq_restore(flags);
|
||||
return err;
|
||||
}
|
||||
|
||||
int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
|
||||
{
|
||||
struct stop_machine_data smdata = { .fn = fn, .data = data,
|
||||
.num_threads = num_online_cpus(),
|
||||
.active_cpus = cpus };
|
||||
struct multi_stop_data msdata = {
|
||||
.fn = fn,
|
||||
.data = data,
|
||||
.num_threads = num_online_cpus(),
|
||||
.active_cpus = cpus,
|
||||
};
|
||||
|
||||
if (!stop_machine_initialized) {
|
||||
/*
|
||||
@@ -461,7 +564,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(smdata.num_threads != 1);
|
||||
WARN_ON_ONCE(msdata.num_threads != 1);
|
||||
|
||||
local_irq_save(flags);
|
||||
hard_irq_disable();
|
||||
@@ -472,8 +575,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
|
||||
}
|
||||
|
||||
/* Set the initial state and stop all online cpus. */
|
||||
set_state(&smdata, STOPMACHINE_PREPARE);
|
||||
return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
|
||||
set_state(&msdata, MULTI_STOP_PREPARE);
|
||||
return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
|
||||
}
|
||||
|
||||
int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
|
||||
@@ -513,25 +616,25 @@ EXPORT_SYMBOL_GPL(stop_machine);
|
||||
int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
|
||||
const struct cpumask *cpus)
|
||||
{
|
||||
struct stop_machine_data smdata = { .fn = fn, .data = data,
|
||||
struct multi_stop_data msdata = { .fn = fn, .data = data,
|
||||
.active_cpus = cpus };
|
||||
struct cpu_stop_done done;
|
||||
int ret;
|
||||
|
||||
/* Local CPU must be inactive and CPU hotplug in progress. */
|
||||
BUG_ON(cpu_active(raw_smp_processor_id()));
|
||||
smdata.num_threads = num_active_cpus() + 1; /* +1 for local */
|
||||
msdata.num_threads = num_active_cpus() + 1; /* +1 for local */
|
||||
|
||||
/* No proper task established and can't sleep - busy wait for lock. */
|
||||
while (!mutex_trylock(&stop_cpus_mutex))
|
||||
cpu_relax();
|
||||
|
||||
/* Schedule work on other CPUs and execute directly for local CPU */
|
||||
set_state(&smdata, STOPMACHINE_PREPARE);
|
||||
set_state(&msdata, MULTI_STOP_PREPARE);
|
||||
cpu_stop_init_done(&done, num_active_cpus());
|
||||
queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
|
||||
queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
|
||||
&done);
|
||||
ret = stop_machine_cpu_stop(&smdata);
|
||||
ret = multi_cpu_stop(&msdata);
|
||||
|
||||
/* Busy wait for completion. */
|
||||
while (!completion_done(&done.completion))
|
||||
|
@@ -370,13 +370,6 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "numa_balancing_scan_period_reset",
|
||||
.data = &sysctl_numa_balancing_scan_period_reset,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "numa_balancing_scan_period_max_ms",
|
||||
.data = &sysctl_numa_balancing_scan_period_max,
|
||||
@@ -391,6 +384,20 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "numa_balancing_settle_count",
|
||||
.data = &sysctl_numa_balancing_settle_count,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "numa_balancing_migrate_deferred",
|
||||
.data = &sysctl_numa_balancing_migrate_deferred,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
#endif /* CONFIG_SCHED_DEBUG */
|
||||
{
|
||||
|
@@ -1092,7 +1092,7 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
|
||||
static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
|
||||
unsigned long data)
|
||||
{
|
||||
int preempt_count = preempt_count();
|
||||
int count = preempt_count();
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
/*
|
||||
@@ -1119,16 +1119,16 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
|
||||
|
||||
lock_map_release(&lockdep_map);
|
||||
|
||||
if (preempt_count != preempt_count()) {
|
||||
if (count != preempt_count()) {
|
||||
WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
|
||||
fn, preempt_count, preempt_count());
|
||||
fn, count, preempt_count());
|
||||
/*
|
||||
* Restore the preempt count. That gives us a decent
|
||||
* chance to survive and extract information. If the
|
||||
* callback kept a lock held, bad luck, but not worse
|
||||
* than the BUG() we had.
|
||||
*/
|
||||
preempt_count() = preempt_count;
|
||||
preempt_count_set(count);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1509,7 +1509,8 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
|
||||
#endif
|
||||
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
|
||||
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
|
||||
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
|
||||
(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
|
||||
(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
|
||||
|
||||
|
@@ -124,6 +124,7 @@ enum trace_flag_type {
|
||||
TRACE_FLAG_NEED_RESCHED = 0x04,
|
||||
TRACE_FLAG_HARDIRQ = 0x08,
|
||||
TRACE_FLAG_SOFTIRQ = 0x10,
|
||||
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
|
||||
};
|
||||
|
||||
#define TRACE_BUF_SIZE 1024
|
||||
|
@@ -618,8 +618,23 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
|
||||
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
|
||||
(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
|
||||
'.';
|
||||
need_resched =
|
||||
(entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.';
|
||||
|
||||
switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
|
||||
TRACE_FLAG_PREEMPT_RESCHED)) {
|
||||
case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
|
||||
need_resched = 'N';
|
||||
break;
|
||||
case TRACE_FLAG_NEED_RESCHED:
|
||||
need_resched = 'n';
|
||||
break;
|
||||
case TRACE_FLAG_PREEMPT_RESCHED:
|
||||
need_resched = 'p';
|
||||
break;
|
||||
default:
|
||||
need_resched = '.';
|
||||
break;
|
||||
}
|
||||
|
||||
hardsoft_irq =
|
||||
(hardirq && softirq) ? 'H' :
|
||||
hardirq ? 'h' :
|
||||
|
Reference in New Issue
Block a user