sched/numa: Find an alternative idle CPU if the CPU is part of an active NUMA balance
Multiple tasks can attempt to select and idle CPU but fail because numa_migrate_on is already set and the migration fails. Instead of failing, scan for an alternative idle CPU. select_idle_sibling is not used because it requires IRQs to be disabled and it ignores numa_migrate_on allowing multiple tasks to stack. This scan may still fail if there are idle candidate CPUs due to races but if this occurs, it's best that a task stay on an available CPU that move to a contended one. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Valentin Schneider <valentin.schneider@arm.com> Cc: Phil Auld <pauld@redhat.com> Cc: Hillf Danton <hdanton@sina.com> Link: https://lore.kernel.org/r/20200224095223.13361-12-mgorman@techsingularity.net
This commit is contained in:
@@ -1624,15 +1624,34 @@ static void task_numa_assign(struct task_numa_env *env,
|
|||||||
{
|
{
|
||||||
struct rq *rq = cpu_rq(env->dst_cpu);
|
struct rq *rq = cpu_rq(env->dst_cpu);
|
||||||
|
|
||||||
/* Bail out if run-queue part of active NUMA balance. */
|
/* Check if run-queue part of active NUMA balance. */
|
||||||
if (env->best_cpu != env->dst_cpu && xchg(&rq->numa_migrate_on, 1))
|
if (env->best_cpu != env->dst_cpu && xchg(&rq->numa_migrate_on, 1)) {
|
||||||
return;
|
int cpu;
|
||||||
|
int start = env->dst_cpu;
|
||||||
|
|
||||||
|
/* Find alternative idle CPU. */
|
||||||
|
for_each_cpu_wrap(cpu, cpumask_of_node(env->dst_nid), start) {
|
||||||
|
if (cpu == env->best_cpu || !idle_cpu(cpu) ||
|
||||||
|
!cpumask_test_cpu(cpu, env->p->cpus_ptr)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
env->dst_cpu = cpu;
|
||||||
|
rq = cpu_rq(env->dst_cpu);
|
||||||
|
if (!xchg(&rq->numa_migrate_on, 1))
|
||||||
|
goto assign;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Failed to find an alternative idle CPU */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assign:
|
||||||
/*
|
/*
|
||||||
* Clear previous best_cpu/rq numa-migrate flag, since task now
|
* Clear previous best_cpu/rq numa-migrate flag, since task now
|
||||||
* found a better CPU to move/swap.
|
* found a better CPU to move/swap.
|
||||||
*/
|
*/
|
||||||
if (env->best_cpu != -1) {
|
if (env->best_cpu != -1 && env->best_cpu != env->dst_cpu) {
|
||||||
rq = cpu_rq(env->best_cpu);
|
rq = cpu_rq(env->best_cpu);
|
||||||
WRITE_ONCE(rq->numa_migrate_on, 0);
|
WRITE_ONCE(rq->numa_migrate_on, 0);
|
||||||
}
|
}
|
||||||
@@ -1806,21 +1825,6 @@ assign:
|
|||||||
cpu = env->best_cpu;
|
cpu = env->best_cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Use select_idle_sibling if the previously found idle CPU is
|
|
||||||
* not idle any more.
|
|
||||||
*/
|
|
||||||
if (!idle_cpu(cpu)) {
|
|
||||||
/*
|
|
||||||
* select_idle_siblings() uses an per-CPU cpumask that
|
|
||||||
* can be used from IRQ context.
|
|
||||||
*/
|
|
||||||
local_irq_disable();
|
|
||||||
cpu = select_idle_sibling(env->p, env->src_cpu,
|
|
||||||
env->dst_cpu);
|
|
||||||
local_irq_enable();
|
|
||||||
}
|
|
||||||
|
|
||||||
env->dst_cpu = cpu;
|
env->dst_cpu = cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user