|
|
|
@@ -1492,6 +1492,21 @@ struct task_numa_env {
|
|
|
|
|
static void task_numa_assign(struct task_numa_env *env,
|
|
|
|
|
struct task_struct *p, long imp)
|
|
|
|
|
{
|
|
|
|
|
struct rq *rq = cpu_rq(env->dst_cpu);
|
|
|
|
|
|
|
|
|
|
/* Bail out if run-queue part of active NUMA balance. */
|
|
|
|
|
if (xchg(&rq->numa_migrate_on, 1))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Clear previous best_cpu/rq numa-migrate flag, since task now
|
|
|
|
|
* found a better CPU to move/swap.
|
|
|
|
|
*/
|
|
|
|
|
if (env->best_cpu != -1) {
|
|
|
|
|
rq = cpu_rq(env->best_cpu);
|
|
|
|
|
WRITE_ONCE(rq->numa_migrate_on, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (env->best_task)
|
|
|
|
|
put_task_struct(env->best_task);
|
|
|
|
|
if (p)
|
|
|
|
@@ -1530,6 +1545,13 @@ static bool load_too_imbalanced(long src_load, long dst_load,
|
|
|
|
|
return (imb > old_imb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Maximum NUMA importance can be 1998 (2*999);
|
|
|
|
|
* SMALLIMP @ 30 would be close to 1998/64.
|
|
|
|
|
* Used to deter task migration.
|
|
|
|
|
*/
|
|
|
|
|
#define SMALLIMP 30
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This checks if the overall compute and NUMA accesses of the system would
|
|
|
|
|
* be improved if the source tasks was migrated to the target dst_cpu taking
|
|
|
|
@@ -1547,6 +1569,9 @@ static void task_numa_compare(struct task_numa_env *env,
|
|
|
|
|
long moveimp = imp;
|
|
|
|
|
int dist = env->dist;
|
|
|
|
|
|
|
|
|
|
if (READ_ONCE(dst_rq->numa_migrate_on))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
cur = task_rcu_dereference(&dst_rq->curr);
|
|
|
|
|
if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
|
|
|
|
@@ -1560,7 +1585,7 @@ static void task_numa_compare(struct task_numa_env *env,
|
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
|
|
if (!cur) {
|
|
|
|
|
if (maymove || imp > env->best_imp)
|
|
|
|
|
if (maymove && moveimp >= env->best_imp)
|
|
|
|
|
goto assign;
|
|
|
|
|
else
|
|
|
|
|
goto unlock;
|
|
|
|
@@ -1603,15 +1628,21 @@ static void task_numa_compare(struct task_numa_env *env,
|
|
|
|
|
task_weight(cur, env->dst_nid, dist);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (imp <= env->best_imp)
|
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
|
|
if (maymove && moveimp > imp && moveimp > env->best_imp) {
|
|
|
|
|
imp = moveimp - 1;
|
|
|
|
|
imp = moveimp;
|
|
|
|
|
cur = NULL;
|
|
|
|
|
goto assign;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the NUMA importance is less than SMALLIMP,
|
|
|
|
|
* task migration might only result in ping pong
|
|
|
|
|
* of tasks and also hurt performance due to cache
|
|
|
|
|
* misses.
|
|
|
|
|
*/
|
|
|
|
|
if (imp < SMALLIMP || imp <= env->best_imp + SMALLIMP / 2)
|
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* In the overloaded case, try and keep the load balanced.
|
|
|
|
|
*/
|
|
|
|
@@ -1688,6 +1719,7 @@ static int task_numa_migrate(struct task_struct *p)
|
|
|
|
|
.best_cpu = -1,
|
|
|
|
|
};
|
|
|
|
|
struct sched_domain *sd;
|
|
|
|
|
struct rq *best_rq;
|
|
|
|
|
unsigned long taskweight, groupweight;
|
|
|
|
|
int nid, ret, dist;
|
|
|
|
|
long taskimp, groupimp;
|
|
|
|
@@ -1783,20 +1815,17 @@ static int task_numa_migrate(struct task_struct *p)
|
|
|
|
|
if (env.best_cpu == -1)
|
|
|
|
|
return -EAGAIN;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Reset the scan period if the task is being rescheduled on an
|
|
|
|
|
* alternative node to recheck if the tasks is now properly placed.
|
|
|
|
|
*/
|
|
|
|
|
p->numa_scan_period = task_scan_start(p);
|
|
|
|
|
|
|
|
|
|
best_rq = cpu_rq(env.best_cpu);
|
|
|
|
|
if (env.best_task == NULL) {
|
|
|
|
|
ret = migrate_task_to(p, env.best_cpu);
|
|
|
|
|
WRITE_ONCE(best_rq->numa_migrate_on, 0);
|
|
|
|
|
if (ret != 0)
|
|
|
|
|
trace_sched_stick_numa(p, env.src_cpu, env.best_cpu);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);
|
|
|
|
|
WRITE_ONCE(best_rq->numa_migrate_on, 0);
|
|
|
|
|
|
|
|
|
|
if (ret != 0)
|
|
|
|
|
trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
|
|
|
|
@@ -2574,6 +2603,39 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void update_scan_period(struct task_struct *p, int new_cpu)
|
|
|
|
|
{
|
|
|
|
|
int src_nid = cpu_to_node(task_cpu(p));
|
|
|
|
|
int dst_nid = cpu_to_node(new_cpu);
|
|
|
|
|
|
|
|
|
|
if (!static_branch_likely(&sched_numa_balancing))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (src_nid == dst_nid)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Allow resets if faults have been trapped before one scan
|
|
|
|
|
* has completed. This is most likely due to a new task that
|
|
|
|
|
* is pulled cross-node due to wakeups or load balancing.
|
|
|
|
|
*/
|
|
|
|
|
if (p->numa_scan_seq) {
|
|
|
|
|
/*
|
|
|
|
|
* Avoid scan adjustments if moving to the preferred
|
|
|
|
|
* node or if the task was not previously running on
|
|
|
|
|
* the preferred node.
|
|
|
|
|
*/
|
|
|
|
|
if (dst_nid == p->numa_preferred_nid ||
|
|
|
|
|
(p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid))
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p->numa_scan_period = task_scan_start(p);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
static void task_tick_numa(struct rq *rq, struct task_struct *curr)
|
|
|
|
|
{
|
|
|
|
@@ -2587,6 +2649,10 @@ static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void update_scan_period(struct task_struct *p, int new_cpu)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif /* CONFIG_NUMA_BALANCING */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
@@ -6280,7 +6346,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
|
|
|
|
|
* cfs_rq_of(p) references at time of call are still valid and identify the
|
|
|
|
|
* previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
|
|
|
|
|
*/
|
|
|
|
|
static void migrate_task_rq_fair(struct task_struct *p)
|
|
|
|
|
static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* As blocked tasks retain absolute vruntime the migration needs to
|
|
|
|
@@ -6333,6 +6399,8 @@ static void migrate_task_rq_fair(struct task_struct *p)
|
|
|
|
|
|
|
|
|
|
/* We have migrated, no longer consider this task hot */
|
|
|
|
|
p->se.exec_start = 0;
|
|
|
|
|
|
|
|
|
|
update_scan_period(p, new_cpu);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void task_dead_fair(struct task_struct *p)
|
|
|
|
|