sched/pelt: Add a new runnable average signal
Now that runnable_load_avg has been removed, we can replace it by a new signal that will highlight the runnable pressure on a cfs_rq. This signal track the waiting time of tasks on rq and can help to better define the state of rqs. At now, only util_avg is used to define the state of a rq: A rq with more that around 80% of utilization and more than 1 tasks is considered as overloaded. But the util_avg signal of a rq can become temporaly low after that a task migrated onto another rq which can bias the classification of the rq. When tasks compete for the same rq, their runnable average signal will be higher than util_avg as it will include the waiting time and we can use this signal to better classify cfs_rqs. The new runnable_avg will track the runnable time of a task which simply adds the waiting time to the running time. The runnable _avg of cfs_rq will be the /Sum of se's runnable_avg and the runnable_avg of group entity will follow the one of the rq similarly to util_avg. Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: "Dietmar Eggemann <dietmar.eggemann@arm.com>" Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Valentin Schneider <valentin.schneider@arm.com> Cc: Phil Auld <pauld@redhat.com> Cc: Hillf Danton <hdanton@sina.com> Link: https://lore.kernel.org/r/20200224095223.13361-9-mgorman@techsingularity.net
This commit is contained in:

committed by
Ingo Molnar

parent
0dacee1bfa
commit
9f68395333
@@ -794,6 +794,8 @@ void post_init_entity_util_avg(struct task_struct *p)
|
||||
}
|
||||
}
|
||||
|
||||
sa->runnable_avg = cpu_scale;
|
||||
|
||||
if (p->sched_class != &fair_sched_class) {
|
||||
/*
|
||||
* For !fair tasks do:
|
||||
@@ -3215,9 +3217,9 @@ void set_task_rq_fair(struct sched_entity *se,
|
||||
* _IFF_ we look at the pure running and runnable sums. Because they
|
||||
* represent the very same entity, just at different points in the hierarchy.
|
||||
*
|
||||
* Per the above update_tg_cfs_util() is trivial * and simply copies the
|
||||
* running sum over (but still wrong, because the group entity and group rq do
|
||||
* not have their PELT windows aligned).
|
||||
* Per the above update_tg_cfs_util() and update_tg_cfs_runnable() are trivial
|
||||
* and simply copies the running/runnable sum over (but still wrong, because
|
||||
* the group entity and group rq do not have their PELT windows aligned).
|
||||
*
|
||||
* However, update_tg_cfs_load() is more complex. So we have:
|
||||
*
|
||||
@@ -3299,6 +3301,32 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
|
||||
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
||||
{
|
||||
long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
|
||||
|
||||
/* Nothing to update */
|
||||
if (!delta)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The relation between sum and avg is:
|
||||
*
|
||||
* LOAD_AVG_MAX - 1024 + sa->period_contrib
|
||||
*
|
||||
* however, the PELT windows are not aligned between grq and gse.
|
||||
*/
|
||||
|
||||
/* Set new sched_entity's runnable */
|
||||
se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
|
||||
se->avg.runnable_sum = se->avg.runnable_avg * LOAD_AVG_MAX;
|
||||
|
||||
/* Update parent cfs_rq runnable */
|
||||
add_positive(&cfs_rq->avg.runnable_avg, delta);
|
||||
cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * LOAD_AVG_MAX;
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
|
||||
{
|
||||
@@ -3379,6 +3407,7 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
|
||||
add_tg_cfs_propagate(cfs_rq, gcfs_rq->prop_runnable_sum);
|
||||
|
||||
update_tg_cfs_util(cfs_rq, se, gcfs_rq);
|
||||
update_tg_cfs_runnable(cfs_rq, se, gcfs_rq);
|
||||
update_tg_cfs_load(cfs_rq, se, gcfs_rq);
|
||||
|
||||
trace_pelt_cfs_tp(cfs_rq);
|
||||
@@ -3449,7 +3478,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
|
||||
static inline int
|
||||
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
unsigned long removed_load = 0, removed_util = 0, removed_runnable_sum = 0;
|
||||
unsigned long removed_load = 0, removed_util = 0, removed_runnable = 0;
|
||||
struct sched_avg *sa = &cfs_rq->avg;
|
||||
int decayed = 0;
|
||||
|
||||
@@ -3460,7 +3489,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
||||
raw_spin_lock(&cfs_rq->removed.lock);
|
||||
swap(cfs_rq->removed.util_avg, removed_util);
|
||||
swap(cfs_rq->removed.load_avg, removed_load);
|
||||
swap(cfs_rq->removed.runnable_sum, removed_runnable_sum);
|
||||
swap(cfs_rq->removed.runnable_avg, removed_runnable);
|
||||
cfs_rq->removed.nr = 0;
|
||||
raw_spin_unlock(&cfs_rq->removed.lock);
|
||||
|
||||
@@ -3472,7 +3501,16 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
||||
sub_positive(&sa->util_avg, r);
|
||||
sub_positive(&sa->util_sum, r * divider);
|
||||
|
||||
add_tg_cfs_propagate(cfs_rq, -(long)removed_runnable_sum);
|
||||
r = removed_runnable;
|
||||
sub_positive(&sa->runnable_avg, r);
|
||||
sub_positive(&sa->runnable_sum, r * divider);
|
||||
|
||||
/*
|
||||
* removed_runnable is the unweighted version of removed_load so we
|
||||
* can use it to estimate removed_load_sum.
|
||||
*/
|
||||
add_tg_cfs_propagate(cfs_rq,
|
||||
-(long)(removed_runnable * divider) >> SCHED_CAPACITY_SHIFT);
|
||||
|
||||
decayed = 1;
|
||||
}
|
||||
@@ -3517,6 +3555,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
||||
*/
|
||||
se->avg.util_sum = se->avg.util_avg * divider;
|
||||
|
||||
se->avg.runnable_sum = se->avg.runnable_avg * divider;
|
||||
|
||||
se->avg.load_sum = divider;
|
||||
if (se_weight(se)) {
|
||||
se->avg.load_sum =
|
||||
@@ -3526,6 +3566,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
||||
enqueue_load_avg(cfs_rq, se);
|
||||
cfs_rq->avg.util_avg += se->avg.util_avg;
|
||||
cfs_rq->avg.util_sum += se->avg.util_sum;
|
||||
cfs_rq->avg.runnable_avg += se->avg.runnable_avg;
|
||||
cfs_rq->avg.runnable_sum += se->avg.runnable_sum;
|
||||
|
||||
add_tg_cfs_propagate(cfs_rq, se->avg.load_sum);
|
||||
|
||||
@@ -3547,6 +3589,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
|
||||
dequeue_load_avg(cfs_rq, se);
|
||||
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
|
||||
sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
|
||||
sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
|
||||
sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
|
||||
|
||||
add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
|
||||
|
||||
@@ -3653,10 +3697,15 @@ static void remove_entity_load_avg(struct sched_entity *se)
|
||||
++cfs_rq->removed.nr;
|
||||
cfs_rq->removed.util_avg += se->avg.util_avg;
|
||||
cfs_rq->removed.load_avg += se->avg.load_avg;
|
||||
cfs_rq->removed.runnable_sum += se->avg.load_sum; /* == runnable_sum */
|
||||
cfs_rq->removed.runnable_avg += se->avg.runnable_avg;
|
||||
raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
|
||||
}
|
||||
|
||||
static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return cfs_rq->avg.runnable_avg;
|
||||
}
|
||||
|
||||
static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return cfs_rq->avg.load_avg;
|
||||
@@ -3983,11 +4032,13 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
/*
|
||||
* When enqueuing a sched_entity, we must:
|
||||
* - Update loads to have both entity and cfs_rq synced with now.
|
||||
* - Add its load to cfs_rq->runnable_avg
|
||||
* - For group_entity, update its weight to reflect the new share of
|
||||
* its group cfs_rq
|
||||
* - Add its new weight to cfs_rq->load.weight
|
||||
*/
|
||||
update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
|
||||
se_update_runnable(se);
|
||||
update_cfs_group(se);
|
||||
account_entity_enqueue(cfs_rq, se);
|
||||
|
||||
@@ -4065,11 +4116,13 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
/*
|
||||
* When dequeuing a sched_entity, we must:
|
||||
* - Update loads to have both entity and cfs_rq synced with now.
|
||||
* - Subtract its load from the cfs_rq->runnable_avg.
|
||||
* - Subtract its previous weight from cfs_rq->load.weight.
|
||||
* - For group entity, update its weight to reflect the new share
|
||||
* of its group cfs_rq.
|
||||
*/
|
||||
update_load_avg(cfs_rq, se, UPDATE_TG);
|
||||
se_update_runnable(se);
|
||||
|
||||
update_stats_dequeue(cfs_rq, se, flags);
|
||||
|
||||
@@ -5240,6 +5293,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
goto enqueue_throttle;
|
||||
|
||||
update_load_avg(cfs_rq, se, UPDATE_TG);
|
||||
se_update_runnable(se);
|
||||
update_cfs_group(se);
|
||||
|
||||
cfs_rq->h_nr_running++;
|
||||
@@ -5337,6 +5391,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
goto dequeue_throttle;
|
||||
|
||||
update_load_avg(cfs_rq, se, UPDATE_TG);
|
||||
se_update_runnable(se);
|
||||
update_cfs_group(se);
|
||||
|
||||
cfs_rq->h_nr_running--;
|
||||
@@ -5409,6 +5464,11 @@ static unsigned long cpu_load_without(struct rq *rq, struct task_struct *p)
|
||||
return load;
|
||||
}
|
||||
|
||||
static unsigned long cpu_runnable(struct rq *rq)
|
||||
{
|
||||
return cfs_rq_runnable_avg(&rq->cfs);
|
||||
}
|
||||
|
||||
static unsigned long capacity_of(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cpu_capacity;
|
||||
@@ -7554,6 +7614,9 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
|
||||
if (cfs_rq->avg.util_sum)
|
||||
return false;
|
||||
|
||||
if (cfs_rq->avg.runnable_sum)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user