|
|
|
@@ -283,9 +283,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
|
|
|
|
|
return grp->my_q;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq,
|
|
|
|
|
int force_update);
|
|
|
|
|
|
|
|
|
|
static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
|
|
|
|
|
{
|
|
|
|
|
if (!cfs_rq->on_list) {
|
|
|
|
@@ -305,8 +302,6 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cfs_rq->on_list = 1;
|
|
|
|
|
/* We should have no load, but we need to update last_decay. */
|
|
|
|
|
update_cfs_rq_blocked_load(cfs_rq, 0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -664,19 +659,31 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
|
|
static int select_idle_sibling(struct task_struct *p, int cpu);
|
|
|
|
|
static unsigned long task_h_load(struct task_struct *p);
|
|
|
|
|
|
|
|
|
|
static inline void __update_task_entity_contrib(struct sched_entity *se);
|
|
|
|
|
static inline void __update_task_entity_utilization(struct sched_entity *se);
|
|
|
|
|
/*
|
|
|
|
|
* We choose a half-life close to 1 scheduling period.
|
|
|
|
|
* Note: The tables below are dependent on this value.
|
|
|
|
|
*/
|
|
|
|
|
#define LOAD_AVG_PERIOD 32
|
|
|
|
|
#define LOAD_AVG_MAX 47742 /* maximum possible load avg */
|
|
|
|
|
#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */
|
|
|
|
|
|
|
|
|
|
/* Give new task start runnable values to heavy its load in infant time */
|
|
|
|
|
void init_task_runnable_average(struct task_struct *p)
|
|
|
|
|
{
|
|
|
|
|
u32 slice;
|
|
|
|
|
struct sched_avg *sa = &p->se.avg;
|
|
|
|
|
|
|
|
|
|
slice = sched_slice(task_cfs_rq(p), &p->se) >> 10;
|
|
|
|
|
p->se.avg.runnable_avg_sum = p->se.avg.running_avg_sum = slice;
|
|
|
|
|
p->se.avg.avg_period = slice;
|
|
|
|
|
__update_task_entity_contrib(&p->se);
|
|
|
|
|
__update_task_entity_utilization(&p->se);
|
|
|
|
|
sa->last_update_time = 0;
|
|
|
|
|
/*
|
|
|
|
|
* sched_avg's period_contrib should be strictly less then 1024, so
|
|
|
|
|
* we give it 1023 to make sure it is almost a period (1024us), and
|
|
|
|
|
* will definitely be update (after enqueue).
|
|
|
|
|
*/
|
|
|
|
|
sa->period_contrib = 1023;
|
|
|
|
|
sa->load_avg = scale_load_down(p->se.load.weight);
|
|
|
|
|
sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
|
|
|
|
|
sa->util_avg = scale_load_down(SCHED_LOAD_SCALE);
|
|
|
|
|
sa->util_sum = LOAD_AVG_MAX;
|
|
|
|
|
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
void init_task_runnable_average(struct task_struct *p)
|
|
|
|
@@ -1698,8 +1705,8 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
|
|
|
|
|
delta = runtime - p->last_sum_exec_runtime;
|
|
|
|
|
*period = now - p->last_task_numa_placement;
|
|
|
|
|
} else {
|
|
|
|
|
delta = p->se.avg.runnable_avg_sum;
|
|
|
|
|
*period = p->se.avg.avg_period;
|
|
|
|
|
delta = p->se.avg.load_sum / p->se.load.weight;
|
|
|
|
|
*period = LOAD_AVG_MAX;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p->last_sum_exec_runtime = runtime;
|
|
|
|
@@ -2347,13 +2354,13 @@ static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
|
|
|
|
|
long tg_weight;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Use this CPU's actual weight instead of the last load_contribution
|
|
|
|
|
* to gain a more accurate current total weight. See
|
|
|
|
|
* __update_cfs_rq_tg_load_contrib().
|
|
|
|
|
* Use this CPU's real-time load instead of the last load contribution
|
|
|
|
|
* as the updating of the contribution is delayed, and we will use the
|
|
|
|
|
* the real-time load to calc the share. See update_tg_load_avg().
|
|
|
|
|
*/
|
|
|
|
|
tg_weight = atomic_long_read(&tg->load_avg);
|
|
|
|
|
tg_weight -= cfs_rq->tg_load_contrib;
|
|
|
|
|
tg_weight += cfs_rq->load.weight;
|
|
|
|
|
tg_weight -= cfs_rq->tg_load_avg_contrib;
|
|
|
|
|
tg_weight += cfs_rq->avg.load_avg;
|
|
|
|
|
|
|
|
|
|
return tg_weight;
|
|
|
|
|
}
|
|
|
|
@@ -2363,7 +2370,7 @@ static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
|
|
|
|
|
long tg_weight, load, shares;
|
|
|
|
|
|
|
|
|
|
tg_weight = calc_tg_weight(tg, cfs_rq);
|
|
|
|
|
load = cfs_rq->load.weight;
|
|
|
|
|
load = cfs_rq->avg.load_avg;
|
|
|
|
|
|
|
|
|
|
shares = (tg->shares * load);
|
|
|
|
|
if (tg_weight)
|
|
|
|
@@ -2425,14 +2432,6 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
|
|
|
|
|
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
|
/*
|
|
|
|
|
* We choose a half-life close to 1 scheduling period.
|
|
|
|
|
* Note: The tables below are dependent on this value.
|
|
|
|
|
*/
|
|
|
|
|
#define LOAD_AVG_PERIOD 32
|
|
|
|
|
#define LOAD_AVG_MAX 47742 /* maximum possible load avg */
|
|
|
|
|
#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */
|
|
|
|
|
|
|
|
|
|
/* Precomputed fixed inverse multiplies for multiplication by y^n */
|
|
|
|
|
static const u32 runnable_avg_yN_inv[] = {
|
|
|
|
|
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
|
|
|
|
@@ -2481,9 +2480,8 @@ static __always_inline u64 decay_load(u64 val, u64 n)
|
|
|
|
|
local_n %= LOAD_AVG_PERIOD;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val *= runnable_avg_yN_inv[local_n];
|
|
|
|
|
/* We don't use SRR here since we always want to round down. */
|
|
|
|
|
return val >> 32;
|
|
|
|
|
val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32);
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@@ -2542,23 +2540,22 @@ static u32 __compute_runnable_contrib(u64 n)
|
|
|
|
|
* load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
|
|
|
|
|
* = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
|
|
|
|
|
*/
|
|
|
|
|
static __always_inline int __update_entity_runnable_avg(u64 now, int cpu,
|
|
|
|
|
struct sched_avg *sa,
|
|
|
|
|
int runnable,
|
|
|
|
|
int running)
|
|
|
|
|
static __always_inline int
|
|
|
|
|
__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
|
|
|
|
|
unsigned long weight, int running)
|
|
|
|
|
{
|
|
|
|
|
u64 delta, periods;
|
|
|
|
|
u32 runnable_contrib;
|
|
|
|
|
u32 contrib;
|
|
|
|
|
int delta_w, decayed = 0;
|
|
|
|
|
unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu);
|
|
|
|
|
|
|
|
|
|
delta = now - sa->last_runnable_update;
|
|
|
|
|
delta = now - sa->last_update_time;
|
|
|
|
|
/*
|
|
|
|
|
* This should only happen when time goes backwards, which it
|
|
|
|
|
* unfortunately does during sched clock init when we swap over to TSC.
|
|
|
|
|
*/
|
|
|
|
|
if ((s64)delta < 0) {
|
|
|
|
|
sa->last_runnable_update = now;
|
|
|
|
|
sa->last_update_time = now;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -2569,26 +2566,26 @@ static __always_inline int __update_entity_runnable_avg(u64 now, int cpu,
|
|
|
|
|
delta >>= 10;
|
|
|
|
|
if (!delta)
|
|
|
|
|
return 0;
|
|
|
|
|
sa->last_runnable_update = now;
|
|
|
|
|
sa->last_update_time = now;
|
|
|
|
|
|
|
|
|
|
/* delta_w is the amount already accumulated against our next period */
|
|
|
|
|
delta_w = sa->avg_period % 1024;
|
|
|
|
|
delta_w = sa->period_contrib;
|
|
|
|
|
if (delta + delta_w >= 1024) {
|
|
|
|
|
/* period roll-over */
|
|
|
|
|
decayed = 1;
|
|
|
|
|
|
|
|
|
|
/* how much left for next period will start over, we don't know yet */
|
|
|
|
|
sa->period_contrib = 0;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now that we know we're crossing a period boundary, figure
|
|
|
|
|
* out how much from delta we need to complete the current
|
|
|
|
|
* period and accrue it.
|
|
|
|
|
*/
|
|
|
|
|
delta_w = 1024 - delta_w;
|
|
|
|
|
if (runnable)
|
|
|
|
|
sa->runnable_avg_sum += delta_w;
|
|
|
|
|
if (weight)
|
|
|
|
|
sa->load_sum += weight * delta_w;
|
|
|
|
|
if (running)
|
|
|
|
|
sa->running_avg_sum += delta_w * scale_freq
|
|
|
|
|
>> SCHED_CAPACITY_SHIFT;
|
|
|
|
|
sa->avg_period += delta_w;
|
|
|
|
|
sa->util_sum += delta_w * scale_freq >> SCHED_CAPACITY_SHIFT;
|
|
|
|
|
|
|
|
|
|
delta -= delta_w;
|
|
|
|
|
|
|
|
|
@@ -2596,334 +2593,156 @@ static __always_inline int __update_entity_runnable_avg(u64 now, int cpu,
|
|
|
|
|
periods = delta / 1024;
|
|
|
|
|
delta %= 1024;
|
|
|
|
|
|
|
|
|
|
sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum,
|
|
|
|
|
periods + 1);
|
|
|
|
|
sa->running_avg_sum = decay_load(sa->running_avg_sum,
|
|
|
|
|
periods + 1);
|
|
|
|
|
sa->avg_period = decay_load(sa->avg_period,
|
|
|
|
|
periods + 1);
|
|
|
|
|
sa->load_sum = decay_load(sa->load_sum, periods + 1);
|
|
|
|
|
sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
|
|
|
|
|
|
|
|
|
|
/* Efficiently calculate \sum (1..n_period) 1024*y^i */
|
|
|
|
|
runnable_contrib = __compute_runnable_contrib(periods);
|
|
|
|
|
if (runnable)
|
|
|
|
|
sa->runnable_avg_sum += runnable_contrib;
|
|
|
|
|
contrib = __compute_runnable_contrib(periods);
|
|
|
|
|
if (weight)
|
|
|
|
|
sa->load_sum += weight * contrib;
|
|
|
|
|
if (running)
|
|
|
|
|
sa->running_avg_sum += runnable_contrib * scale_freq
|
|
|
|
|
>> SCHED_CAPACITY_SHIFT;
|
|
|
|
|
sa->avg_period += runnable_contrib;
|
|
|
|
|
sa->util_sum += contrib * scale_freq >> SCHED_CAPACITY_SHIFT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Remainder of delta accrued against u_0` */
|
|
|
|
|
if (runnable)
|
|
|
|
|
sa->runnable_avg_sum += delta;
|
|
|
|
|
if (weight)
|
|
|
|
|
sa->load_sum += weight * delta;
|
|
|
|
|
if (running)
|
|
|
|
|
sa->running_avg_sum += delta * scale_freq
|
|
|
|
|
>> SCHED_CAPACITY_SHIFT;
|
|
|
|
|
sa->avg_period += delta;
|
|
|
|
|
sa->util_sum += delta * scale_freq >> SCHED_CAPACITY_SHIFT;
|
|
|
|
|
|
|
|
|
|
sa->period_contrib += delta;
|
|
|
|
|
|
|
|
|
|
if (decayed) {
|
|
|
|
|
sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
|
|
|
|
|
sa->util_avg = (sa->util_sum << SCHED_LOAD_SHIFT) / LOAD_AVG_MAX;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return decayed;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Synchronize an entity's decay with its parenting cfs_rq.*/
|
|
|
|
|
static inline u64 __synchronize_entity_decay(struct sched_entity *se)
|
|
|
|
|
{
|
|
|
|
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
|
|
|
|
u64 decays = atomic64_read(&cfs_rq->decay_counter);
|
|
|
|
|
|
|
|
|
|
decays -= se->avg.decay_count;
|
|
|
|
|
se->avg.decay_count = 0;
|
|
|
|
|
if (!decays)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
|
|
|
|
|
se->avg.utilization_avg_contrib =
|
|
|
|
|
decay_load(se->avg.utilization_avg_contrib, decays);
|
|
|
|
|
|
|
|
|
|
return decays;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
|
|
static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
|
|
|
|
|
int force_update)
|
|
|
|
|
{
|
|
|
|
|
struct task_group *tg = cfs_rq->tg;
|
|
|
|
|
long tg_contrib;
|
|
|
|
|
|
|
|
|
|
tg_contrib = cfs_rq->runnable_load_avg + cfs_rq->blocked_load_avg;
|
|
|
|
|
tg_contrib -= cfs_rq->tg_load_contrib;
|
|
|
|
|
|
|
|
|
|
if (!tg_contrib)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (force_update || abs(tg_contrib) > cfs_rq->tg_load_contrib / 8) {
|
|
|
|
|
atomic_long_add(tg_contrib, &tg->load_avg);
|
|
|
|
|
cfs_rq->tg_load_contrib += tg_contrib;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Aggregate cfs_rq runnable averages into an equivalent task_group
|
|
|
|
|
* representation for computing load contributions.
|
|
|
|
|
* Updating tg's load_avg is necessary before update_cfs_share (which is done)
|
|
|
|
|
* and effective_load (which is not done because it is too costly).
|
|
|
|
|
*/
|
|
|
|
|
static inline void __update_tg_runnable_avg(struct sched_avg *sa,
|
|
|
|
|
struct cfs_rq *cfs_rq)
|
|
|
|
|
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
|
|
|
|
|
{
|
|
|
|
|
struct task_group *tg = cfs_rq->tg;
|
|
|
|
|
long contrib;
|
|
|
|
|
long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib;
|
|
|
|
|
|
|
|
|
|
/* The fraction of a cpu used by this cfs_rq */
|
|
|
|
|
contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT,
|
|
|
|
|
sa->avg_period + 1);
|
|
|
|
|
contrib -= cfs_rq->tg_runnable_contrib;
|
|
|
|
|
|
|
|
|
|
if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
|
|
|
|
|
atomic_add(contrib, &tg->runnable_avg);
|
|
|
|
|
cfs_rq->tg_runnable_contrib += contrib;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void __update_group_entity_contrib(struct sched_entity *se)
|
|
|
|
|
{
|
|
|
|
|
struct cfs_rq *cfs_rq = group_cfs_rq(se);
|
|
|
|
|
struct task_group *tg = cfs_rq->tg;
|
|
|
|
|
int runnable_avg;
|
|
|
|
|
|
|
|
|
|
u64 contrib;
|
|
|
|
|
|
|
|
|
|
contrib = cfs_rq->tg_load_contrib * tg->shares;
|
|
|
|
|
se->avg.load_avg_contrib = div_u64(contrib,
|
|
|
|
|
atomic_long_read(&tg->load_avg) + 1);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* For group entities we need to compute a correction term in the case
|
|
|
|
|
* that they are consuming <1 cpu so that we would contribute the same
|
|
|
|
|
* load as a task of equal weight.
|
|
|
|
|
*
|
|
|
|
|
* Explicitly co-ordinating this measurement would be expensive, but
|
|
|
|
|
* fortunately the sum of each cpus contribution forms a usable
|
|
|
|
|
* lower-bound on the true value.
|
|
|
|
|
*
|
|
|
|
|
* Consider the aggregate of 2 contributions. Either they are disjoint
|
|
|
|
|
* (and the sum represents true value) or they are disjoint and we are
|
|
|
|
|
* understating by the aggregate of their overlap.
|
|
|
|
|
*
|
|
|
|
|
* Extending this to N cpus, for a given overlap, the maximum amount we
|
|
|
|
|
* understand is then n_i(n_i+1)/2 * w_i where n_i is the number of
|
|
|
|
|
* cpus that overlap for this interval and w_i is the interval width.
|
|
|
|
|
*
|
|
|
|
|
* On a small machine; the first term is well-bounded which bounds the
|
|
|
|
|
* total error since w_i is a subset of the period. Whereas on a
|
|
|
|
|
* larger machine, while this first term can be larger, if w_i is the
|
|
|
|
|
* of consequential size guaranteed to see n_i*w_i quickly converge to
|
|
|
|
|
* our upper bound of 1-cpu.
|
|
|
|
|
*/
|
|
|
|
|
runnable_avg = atomic_read(&tg->runnable_avg);
|
|
|
|
|
if (runnable_avg < NICE_0_LOAD) {
|
|
|
|
|
se->avg.load_avg_contrib *= runnable_avg;
|
|
|
|
|
se->avg.load_avg_contrib >>= NICE_0_SHIFT;
|
|
|
|
|
if (force || abs(delta) > cfs_rq->tg_load_avg_contrib / 64) {
|
|
|
|
|
atomic_long_add(delta, &cfs_rq->tg->load_avg);
|
|
|
|
|
cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#else /* CONFIG_FAIR_GROUP_SCHED */
|
|
|
|
|
static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
|
|
|
|
|
int force_update) {}
|
|
|
|
|
static inline void __update_tg_runnable_avg(struct sched_avg *sa,
|
|
|
|
|
struct cfs_rq *cfs_rq) {}
|
|
|
|
|
static inline void __update_group_entity_contrib(struct sched_entity *se) {}
|
|
|
|
|
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
|
|
|
|
|
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
|
|
|
|
|
|
|
|
|
static inline void __update_task_entity_contrib(struct sched_entity *se)
|
|
|
|
|
{
|
|
|
|
|
u32 contrib;
|
|
|
|
|
|
|
|
|
|
/* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */
|
|
|
|
|
contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight);
|
|
|
|
|
contrib /= (se->avg.avg_period + 1);
|
|
|
|
|
se->avg.load_avg_contrib = scale_load(contrib);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Compute the current contribution to load_avg by se, return any delta */
|
|
|
|
|
static long __update_entity_load_avg_contrib(struct sched_entity *se)
|
|
|
|
|
{
|
|
|
|
|
long old_contrib = se->avg.load_avg_contrib;
|
|
|
|
|
|
|
|
|
|
if (entity_is_task(se)) {
|
|
|
|
|
__update_task_entity_contrib(se);
|
|
|
|
|
} else {
|
|
|
|
|
__update_tg_runnable_avg(&se->avg, group_cfs_rq(se));
|
|
|
|
|
__update_group_entity_contrib(se);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return se->avg.load_avg_contrib - old_contrib;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline void __update_task_entity_utilization(struct sched_entity *se)
|
|
|
|
|
{
|
|
|
|
|
u32 contrib;
|
|
|
|
|
|
|
|
|
|
/* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */
|
|
|
|
|
contrib = se->avg.running_avg_sum * scale_load_down(SCHED_LOAD_SCALE);
|
|
|
|
|
contrib /= (se->avg.avg_period + 1);
|
|
|
|
|
se->avg.utilization_avg_contrib = scale_load(contrib);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static long __update_entity_utilization_avg_contrib(struct sched_entity *se)
|
|
|
|
|
{
|
|
|
|
|
long old_contrib = se->avg.utilization_avg_contrib;
|
|
|
|
|
|
|
|
|
|
if (entity_is_task(se))
|
|
|
|
|
__update_task_entity_utilization(se);
|
|
|
|
|
else
|
|
|
|
|
se->avg.utilization_avg_contrib =
|
|
|
|
|
group_cfs_rq(se)->utilization_load_avg;
|
|
|
|
|
|
|
|
|
|
return se->avg.utilization_avg_contrib - old_contrib;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
|
|
|
|
|
long load_contrib)
|
|
|
|
|
{
|
|
|
|
|
if (likely(load_contrib < cfs_rq->blocked_load_avg))
|
|
|
|
|
cfs_rq->blocked_load_avg -= load_contrib;
|
|
|
|
|
else
|
|
|
|
|
cfs_rq->blocked_load_avg = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
|
|
|
|
|
|
|
|
|
|
/* Update a sched_entity's runnable average */
|
|
|
|
|
static inline void update_entity_load_avg(struct sched_entity *se,
|
|
|
|
|
int update_cfs_rq)
|
|
|
|
|
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
|
|
|
|
|
static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
|
|
|
|
{
|
|
|
|
|
int decayed;
|
|
|
|
|
struct sched_avg *sa = &cfs_rq->avg;
|
|
|
|
|
|
|
|
|
|
if (atomic_long_read(&cfs_rq->removed_load_avg)) {
|
|
|
|
|
long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
|
|
|
|
|
sa->load_avg = max_t(long, sa->load_avg - r, 0);
|
|
|
|
|
sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (atomic_long_read(&cfs_rq->removed_util_avg)) {
|
|
|
|
|
long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
|
|
|
|
|
sa->util_avg = max_t(long, sa->util_avg - r, 0);
|
|
|
|
|
sa->util_sum = max_t(s32, sa->util_sum -
|
|
|
|
|
((r * LOAD_AVG_MAX) >> SCHED_LOAD_SHIFT), 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
|
|
|
|
|
scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL);
|
|
|
|
|
|
|
|
|
|
#ifndef CONFIG_64BIT
|
|
|
|
|
smp_wmb();
|
|
|
|
|
cfs_rq->load_last_update_time_copy = sa->last_update_time;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return decayed;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Update task and its cfs_rq load average */
|
|
|
|
|
static inline void update_load_avg(struct sched_entity *se, int update_tg)
|
|
|
|
|
{
|
|
|
|
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
|
|
|
|
long contrib_delta, utilization_delta;
|
|
|
|
|
int cpu = cpu_of(rq_of(cfs_rq));
|
|
|
|
|
u64 now;
|
|
|
|
|
u64 now = cfs_rq_clock_task(cfs_rq);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* For a group entity we need to use their owned cfs_rq_clock_task() in
|
|
|
|
|
* case they are the parent of a throttled hierarchy.
|
|
|
|
|
* Track task load average for carrying it to new CPU after migrated, and
|
|
|
|
|
* track group sched_entity load average for task_h_load calc in migration
|
|
|
|
|
*/
|
|
|
|
|
if (entity_is_task(se))
|
|
|
|
|
now = cfs_rq_clock_task(cfs_rq);
|
|
|
|
|
else
|
|
|
|
|
now = cfs_rq_clock_task(group_cfs_rq(se));
|
|
|
|
|
__update_load_avg(now, cpu, &se->avg,
|
|
|
|
|
se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se);
|
|
|
|
|
|
|
|
|
|
if (!__update_entity_runnable_avg(now, cpu, &se->avg, se->on_rq,
|
|
|
|
|
cfs_rq->curr == se))
|
|
|
|
|
return;
|
|
|
|
|
if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
|
|
|
|
|
update_tg_load_avg(cfs_rq, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
contrib_delta = __update_entity_load_avg_contrib(se);
|
|
|
|
|
utilization_delta = __update_entity_utilization_avg_contrib(se);
|
|
|
|
|
/* Add the load generated by se into cfs_rq's load average */
|
|
|
|
|
static inline void
|
|
|
|
|
enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
|
|
{
|
|
|
|
|
struct sched_avg *sa = &se->avg;
|
|
|
|
|
u64 now = cfs_rq_clock_task(cfs_rq);
|
|
|
|
|
int migrated = 0, decayed;
|
|
|
|
|
|
|
|
|
|
if (!update_cfs_rq)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
if (se->on_rq) {
|
|
|
|
|
cfs_rq->runnable_load_avg += contrib_delta;
|
|
|
|
|
cfs_rq->utilization_load_avg += utilization_delta;
|
|
|
|
|
} else {
|
|
|
|
|
subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
|
|
|
|
|
if (sa->last_update_time == 0) {
|
|
|
|
|
sa->last_update_time = now;
|
|
|
|
|
migrated = 1;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
__update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
|
|
|
|
|
se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
decayed = update_cfs_rq_load_avg(now, cfs_rq);
|
|
|
|
|
|
|
|
|
|
if (migrated) {
|
|
|
|
|
cfs_rq->avg.load_avg += sa->load_avg;
|
|
|
|
|
cfs_rq->avg.load_sum += sa->load_sum;
|
|
|
|
|
cfs_rq->avg.util_avg += sa->util_avg;
|
|
|
|
|
cfs_rq->avg.util_sum += sa->util_sum;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (decayed || migrated)
|
|
|
|
|
update_tg_load_avg(cfs_rq, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Decay the load contributed by all blocked children and account this so that
|
|
|
|
|
* their contribution may appropriately discounted when they wake up.
|
|
|
|
|
* Task first catches up with cfs_rq, and then subtract
|
|
|
|
|
* itself from the cfs_rq (task must be off the queue now).
|
|
|
|
|
*/
|
|
|
|
|
static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
|
|
|
|
|
void remove_entity_load_avg(struct sched_entity *se)
|
|
|
|
|
{
|
|
|
|
|
u64 now = cfs_rq_clock_task(cfs_rq) >> 20;
|
|
|
|
|
u64 decays;
|
|
|
|
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
|
|
|
|
u64 last_update_time;
|
|
|
|
|
|
|
|
|
|
decays = now - cfs_rq->last_decay;
|
|
|
|
|
if (!decays && !force_update)
|
|
|
|
|
return;
|
|
|
|
|
#ifndef CONFIG_64BIT
|
|
|
|
|
u64 last_update_time_copy;
|
|
|
|
|
|
|
|
|
|
if (atomic_long_read(&cfs_rq->removed_load)) {
|
|
|
|
|
unsigned long removed_load;
|
|
|
|
|
removed_load = atomic_long_xchg(&cfs_rq->removed_load, 0);
|
|
|
|
|
subtract_blocked_load_contrib(cfs_rq, removed_load);
|
|
|
|
|
}
|
|
|
|
|
do {
|
|
|
|
|
last_update_time_copy = cfs_rq->load_last_update_time_copy;
|
|
|
|
|
smp_rmb();
|
|
|
|
|
last_update_time = cfs_rq->avg.last_update_time;
|
|
|
|
|
} while (last_update_time != last_update_time_copy);
|
|
|
|
|
#else
|
|
|
|
|
last_update_time = cfs_rq->avg.last_update_time;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (decays) {
|
|
|
|
|
cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg,
|
|
|
|
|
decays);
|
|
|
|
|
atomic64_add(decays, &cfs_rq->decay_counter);
|
|
|
|
|
cfs_rq->last_decay = now;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__update_cfs_rq_tg_load_contrib(cfs_rq, force_update);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Add the load generated by se into cfs_rq's child load-average */
|
|
|
|
|
static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
|
|
|
|
|
struct sched_entity *se,
|
|
|
|
|
int wakeup)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* We track migrations using entity decay_count <= 0, on a wake-up
|
|
|
|
|
* migration we use a negative decay count to track the remote decays
|
|
|
|
|
* accumulated while sleeping.
|
|
|
|
|
*
|
|
|
|
|
* Newly forked tasks are enqueued with se->avg.decay_count == 0, they
|
|
|
|
|
* are seen by enqueue_entity_load_avg() as a migration with an already
|
|
|
|
|
* constructed load_avg_contrib.
|
|
|
|
|
*/
|
|
|
|
|
if (unlikely(se->avg.decay_count <= 0)) {
|
|
|
|
|
se->avg.last_runnable_update = rq_clock_task(rq_of(cfs_rq));
|
|
|
|
|
if (se->avg.decay_count) {
|
|
|
|
|
/*
|
|
|
|
|
* In a wake-up migration we have to approximate the
|
|
|
|
|
* time sleeping. This is because we can't synchronize
|
|
|
|
|
* clock_task between the two cpus, and it is not
|
|
|
|
|
* guaranteed to be read-safe. Instead, we can
|
|
|
|
|
* approximate this using our carried decays, which are
|
|
|
|
|
* explicitly atomically readable.
|
|
|
|
|
*/
|
|
|
|
|
se->avg.last_runnable_update -= (-se->avg.decay_count)
|
|
|
|
|
<< 20;
|
|
|
|
|
update_entity_load_avg(se, 0);
|
|
|
|
|
/* Indicate that we're now synchronized and on-rq */
|
|
|
|
|
se->avg.decay_count = 0;
|
|
|
|
|
}
|
|
|
|
|
wakeup = 0;
|
|
|
|
|
} else {
|
|
|
|
|
__synchronize_entity_decay(se);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* migrated tasks did not contribute to our blocked load */
|
|
|
|
|
if (wakeup) {
|
|
|
|
|
subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib);
|
|
|
|
|
update_entity_load_avg(se, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
|
|
|
|
|
cfs_rq->utilization_load_avg += se->avg.utilization_avg_contrib;
|
|
|
|
|
/* we force update consideration on load-balancer moves */
|
|
|
|
|
update_cfs_rq_blocked_load(cfs_rq, !wakeup);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Remove se's load from this cfs_rq child load-average, if the entity is
|
|
|
|
|
* transitioning to a blocked state we track its projected decay using
|
|
|
|
|
* blocked_load_avg.
|
|
|
|
|
*/
|
|
|
|
|
static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
|
|
|
|
|
struct sched_entity *se,
|
|
|
|
|
int sleep)
|
|
|
|
|
{
|
|
|
|
|
update_entity_load_avg(se, 1);
|
|
|
|
|
/* we force update consideration on load-balancer moves */
|
|
|
|
|
update_cfs_rq_blocked_load(cfs_rq, !sleep);
|
|
|
|
|
|
|
|
|
|
cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
|
|
|
|
|
cfs_rq->utilization_load_avg -= se->avg.utilization_avg_contrib;
|
|
|
|
|
if (sleep) {
|
|
|
|
|
cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
|
|
|
|
|
se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
|
|
|
|
|
} /* migrations, e.g. sleep=0 leave decay_count == 0 */
|
|
|
|
|
__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0);
|
|
|
|
|
atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
|
|
|
|
|
atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@@ -2948,16 +2767,10 @@ static int idle_balance(struct rq *this_rq);
|
|
|
|
|
|
|
|
|
|
#else /* CONFIG_SMP */
|
|
|
|
|
|
|
|
|
|
static inline void update_entity_load_avg(struct sched_entity *se,
|
|
|
|
|
int update_cfs_rq) {}
|
|
|
|
|
static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
|
|
|
|
|
struct sched_entity *se,
|
|
|
|
|
int wakeup) {}
|
|
|
|
|
static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
|
|
|
|
|
struct sched_entity *se,
|
|
|
|
|
int sleep) {}
|
|
|
|
|
static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq,
|
|
|
|
|
int force_update) {}
|
|
|
|
|
static inline void update_load_avg(struct sched_entity *se, int update_tg) {}
|
|
|
|
|
static inline void
|
|
|
|
|
enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
|
|
|
|
|
static inline void remove_entity_load_avg(struct sched_entity *se) {}
|
|
|
|
|
|
|
|
|
|
static inline int idle_balance(struct rq *rq)
|
|
|
|
|
{
|
|
|
|
@@ -3089,7 +2902,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|
|
|
|
* Update run-time statistics of the 'current'.
|
|
|
|
|
*/
|
|
|
|
|
update_curr(cfs_rq);
|
|
|
|
|
enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP);
|
|
|
|
|
enqueue_entity_load_avg(cfs_rq, se);
|
|
|
|
|
account_entity_enqueue(cfs_rq, se);
|
|
|
|
|
update_cfs_shares(cfs_rq);
|
|
|
|
|
|
|
|
|
@@ -3164,7 +2977,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|
|
|
|
* Update run-time statistics of the 'current'.
|
|
|
|
|
*/
|
|
|
|
|
update_curr(cfs_rq);
|
|
|
|
|
dequeue_entity_load_avg(cfs_rq, se, flags & DEQUEUE_SLEEP);
|
|
|
|
|
update_load_avg(se, 1);
|
|
|
|
|
|
|
|
|
|
update_stats_dequeue(cfs_rq, se);
|
|
|
|
|
if (flags & DEQUEUE_SLEEP) {
|
|
|
|
@@ -3254,7 +3067,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
|
|
*/
|
|
|
|
|
update_stats_wait_end(cfs_rq, se);
|
|
|
|
|
__dequeue_entity(cfs_rq, se);
|
|
|
|
|
update_entity_load_avg(se, 1);
|
|
|
|
|
update_load_avg(se, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
update_stats_curr_start(cfs_rq, se);
|
|
|
|
@@ -3354,7 +3167,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
|
|
|
|
|
/* Put 'current' back into the tree. */
|
|
|
|
|
__enqueue_entity(cfs_rq, prev);
|
|
|
|
|
/* in !on_rq case, update occurred at dequeue */
|
|
|
|
|
update_entity_load_avg(prev, 1);
|
|
|
|
|
update_load_avg(prev, 0);
|
|
|
|
|
}
|
|
|
|
|
cfs_rq->curr = NULL;
|
|
|
|
|
}
|
|
|
|
@@ -3370,8 +3183,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
|
|
|
|
|
/*
|
|
|
|
|
* Ensure that runnable average is periodically updated.
|
|
|
|
|
*/
|
|
|
|
|
update_entity_load_avg(curr, 1);
|
|
|
|
|
update_cfs_rq_blocked_load(cfs_rq, 1);
|
|
|
|
|
update_load_avg(curr, 1);
|
|
|
|
|
update_cfs_shares(cfs_rq);
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_SCHED_HRTICK
|
|
|
|
@@ -4244,8 +4056,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|
|
|
|
if (cfs_rq_throttled(cfs_rq))
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
update_load_avg(se, 1);
|
|
|
|
|
update_cfs_shares(cfs_rq);
|
|
|
|
|
update_entity_load_avg(se, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!se)
|
|
|
|
@@ -4304,8 +4116,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|
|
|
|
if (cfs_rq_throttled(cfs_rq))
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
update_load_avg(se, 1);
|
|
|
|
|
update_cfs_shares(cfs_rq);
|
|
|
|
|
update_entity_load_avg(se, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!se)
|
|
|
|
@@ -4444,7 +4256,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
|
|
|
|
|
static void update_idle_cpu_load(struct rq *this_rq)
|
|
|
|
|
{
|
|
|
|
|
unsigned long curr_jiffies = READ_ONCE(jiffies);
|
|
|
|
|
unsigned long load = this_rq->cfs.runnable_load_avg;
|
|
|
|
|
unsigned long load = this_rq->cfs.avg.load_avg;
|
|
|
|
|
unsigned long pending_updates;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@@ -4490,7 +4302,7 @@ void update_cpu_load_nohz(void)
|
|
|
|
|
*/
|
|
|
|
|
void update_cpu_load_active(struct rq *this_rq)
|
|
|
|
|
{
|
|
|
|
|
unsigned long load = this_rq->cfs.runnable_load_avg;
|
|
|
|
|
unsigned long load = this_rq->cfs.avg.load_avg;
|
|
|
|
|
/*
|
|
|
|
|
* See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
|
|
|
|
|
*/
|
|
|
|
@@ -4501,7 +4313,7 @@ void update_cpu_load_active(struct rq *this_rq)
|
|
|
|
|
/* Used instead of source_load when we know the type == 0 */
|
|
|
|
|
static unsigned long weighted_cpuload(const int cpu)
|
|
|
|
|
{
|
|
|
|
|
return cpu_rq(cpu)->cfs.runnable_load_avg;
|
|
|
|
|
return cpu_rq(cpu)->cfs.avg.load_avg;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@@ -4551,7 +4363,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
|
|
|
|
|
{
|
|
|
|
|
struct rq *rq = cpu_rq(cpu);
|
|
|
|
|
unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running);
|
|
|
|
|
unsigned long load_avg = rq->cfs.runnable_load_avg;
|
|
|
|
|
unsigned long load_avg = rq->cfs.avg.load_avg;
|
|
|
|
|
|
|
|
|
|
if (nr_running)
|
|
|
|
|
return load_avg / nr_running;
|
|
|
|
@@ -4670,7 +4482,7 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
|
|
|
|
|
/*
|
|
|
|
|
* w = rw_i + @wl
|
|
|
|
|
*/
|
|
|
|
|
w = se->my_q->load.weight + wl;
|
|
|
|
|
w = se->my_q->avg.load_avg + wl;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* wl = S * s'_i; see (2)
|
|
|
|
@@ -4691,7 +4503,7 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
|
|
|
|
|
/*
|
|
|
|
|
* wl = dw_i = S * (s'_i - s_i); see (3)
|
|
|
|
|
*/
|
|
|
|
|
wl -= se->load.weight;
|
|
|
|
|
wl -= se->avg.load_avg;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Recursively apply this logic to all parent groups to compute
|
|
|
|
@@ -4761,14 +4573,14 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
|
|
|
|
|
*/
|
|
|
|
|
if (sync) {
|
|
|
|
|
tg = task_group(current);
|
|
|
|
|
weight = current->se.load.weight;
|
|
|
|
|
weight = current->se.avg.load_avg;
|
|
|
|
|
|
|
|
|
|
this_load += effective_load(tg, this_cpu, -weight, -weight);
|
|
|
|
|
load += effective_load(tg, prev_cpu, 0, -weight);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tg = task_group(p);
|
|
|
|
|
weight = p->se.load.weight;
|
|
|
|
|
weight = p->se.avg.load_avg;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* In low-load situations, where prev_cpu is idle and this_cpu is idle
|
|
|
|
@@ -4961,12 +4773,12 @@ done:
|
|
|
|
|
* tasks. The unit of the return value must be the one of capacity so we can
|
|
|
|
|
* compare the usage with the capacity of the CPU that is available for CFS
|
|
|
|
|
* task (ie cpu_capacity).
|
|
|
|
|
* cfs.utilization_load_avg is the sum of running time of runnable tasks on a
|
|
|
|
|
* cfs.avg.util_avg is the sum of running time of runnable tasks on a
|
|
|
|
|
* CPU. It represents the amount of utilization of a CPU in the range
|
|
|
|
|
* [0..SCHED_LOAD_SCALE]. The usage of a CPU can't be higher than the full
|
|
|
|
|
* capacity of the CPU because it's about the running time on this CPU.
|
|
|
|
|
* Nevertheless, cfs.utilization_load_avg can be higher than SCHED_LOAD_SCALE
|
|
|
|
|
* because of unfortunate rounding in avg_period and running_load_avg or just
|
|
|
|
|
* Nevertheless, cfs.avg.util_avg can be higher than SCHED_LOAD_SCALE
|
|
|
|
|
* because of unfortunate rounding in util_avg or just
|
|
|
|
|
* after migrating tasks until the average stabilizes with the new running
|
|
|
|
|
* time. So we need to check that the usage stays into the range
|
|
|
|
|
* [0..cpu_capacity_orig] and cap if necessary.
|
|
|
|
@@ -4975,7 +4787,7 @@ done:
|
|
|
|
|
*/
|
|
|
|
|
static int get_cpu_usage(int cpu)
|
|
|
|
|
{
|
|
|
|
|
unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg;
|
|
|
|
|
unsigned long usage = cpu_rq(cpu)->cfs.avg.util_avg;
|
|
|
|
|
unsigned long capacity = capacity_orig_of(cpu);
|
|
|
|
|
|
|
|
|
|
if (usage >= SCHED_LOAD_SCALE)
|
|
|
|
@@ -5084,26 +4896,22 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
|
|
|
|
* previous cpu. However, the caller only guarantees p->pi_lock is held; no
|
|
|
|
|
* other assumptions, including the state of rq->lock, should be made.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
migrate_task_rq_fair(struct task_struct *p, int next_cpu)
|
|
|
|
|
static void migrate_task_rq_fair(struct task_struct *p, int next_cpu)
|
|
|
|
|
{
|
|
|
|
|
struct sched_entity *se = &p->se;
|
|
|
|
|
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Load tracking: accumulate removed load so that it can be processed
|
|
|
|
|
* when we next update owning cfs_rq under rq->lock. Tasks contribute
|
|
|
|
|
* to blocked load iff they have a positive decay-count. It can never
|
|
|
|
|
* be negative here since on-rq tasks have decay-count == 0.
|
|
|
|
|
* We are supposed to update the task to "current" time, then its up to date
|
|
|
|
|
* and ready to go to new CPU/cfs_rq. But we have difficulty in getting
|
|
|
|
|
* what current time is, so simply throw away the out-of-date time. This
|
|
|
|
|
* will result in the wakee task is less decayed, but giving the wakee more
|
|
|
|
|
* load sounds not bad.
|
|
|
|
|
*/
|
|
|
|
|
if (se->avg.decay_count) {
|
|
|
|
|
se->avg.decay_count = -__synchronize_entity_decay(se);
|
|
|
|
|
atomic_long_add(se->avg.load_avg_contrib,
|
|
|
|
|
&cfs_rq->removed_load);
|
|
|
|
|
}
|
|
|
|
|
remove_entity_load_avg(&p->se);
|
|
|
|
|
|
|
|
|
|
/* Tell new CPU we are migrated */
|
|
|
|
|
p->se.avg.last_update_time = 0;
|
|
|
|
|
|
|
|
|
|
/* We have migrated, no longer consider this task hot */
|
|
|
|
|
se->exec_start = 0;
|
|
|
|
|
p->se.exec_start = 0;
|
|
|
|
|
}
|
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
|
|
|
|
|
@@ -5966,36 +5774,6 @@ static void attach_tasks(struct lb_env *env)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
|
|
/*
|
|
|
|
|
* update tg->load_weight by folding this cpu's load_avg
|
|
|
|
|
*/
|
|
|
|
|
static void __update_blocked_averages_cpu(struct task_group *tg, int cpu)
|
|
|
|
|
{
|
|
|
|
|
struct sched_entity *se = tg->se[cpu];
|
|
|
|
|
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu];
|
|
|
|
|
|
|
|
|
|
/* throttled entities do not contribute to load */
|
|
|
|
|
if (throttled_hierarchy(cfs_rq))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
update_cfs_rq_blocked_load(cfs_rq, 1);
|
|
|
|
|
|
|
|
|
|
if (se) {
|
|
|
|
|
update_entity_load_avg(se, 1);
|
|
|
|
|
/*
|
|
|
|
|
* We pivot on our runnable average having decayed to zero for
|
|
|
|
|
* list removal. This generally implies that all our children
|
|
|
|
|
* have also been removed (modulo rounding error or bandwidth
|
|
|
|
|
* control); however, such cases are rare and we can fix these
|
|
|
|
|
* at enqueue.
|
|
|
|
|
*
|
|
|
|
|
* TODO: fix up out-of-order children on enqueue.
|
|
|
|
|
*/
|
|
|
|
|
if (!se->avg.runnable_avg_sum && !cfs_rq->nr_running)
|
|
|
|
|
list_del_leaf_cfs_rq(cfs_rq);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void update_blocked_averages(int cpu)
|
|
|
|
|
{
|
|
|
|
|
struct rq *rq = cpu_rq(cpu);
|
|
|
|
@@ -6004,19 +5782,19 @@ static void update_blocked_averages(int cpu)
|
|
|
|
|
|
|
|
|
|
raw_spin_lock_irqsave(&rq->lock, flags);
|
|
|
|
|
update_rq_clock(rq);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Iterates the task_group tree in a bottom up fashion, see
|
|
|
|
|
* list_add_leaf_cfs_rq() for details.
|
|
|
|
|
*/
|
|
|
|
|
for_each_leaf_cfs_rq(rq, cfs_rq) {
|
|
|
|
|
/*
|
|
|
|
|
* Note: We may want to consider periodically releasing
|
|
|
|
|
* rq->lock about these updates so that creating many task
|
|
|
|
|
* groups does not result in continually extending hold time.
|
|
|
|
|
*/
|
|
|
|
|
__update_blocked_averages_cpu(cfs_rq->tg, rq->cpu);
|
|
|
|
|
}
|
|
|
|
|
/* throttled entities do not contribute to load */
|
|
|
|
|
if (throttled_hierarchy(cfs_rq))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
|
|
|
|
|
update_tg_load_avg(cfs_rq, 0);
|
|
|
|
|
}
|
|
|
|
|
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -6044,14 +5822,13 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!se) {
|
|
|
|
|
cfs_rq->h_load = cfs_rq->runnable_load_avg;
|
|
|
|
|
cfs_rq->h_load = cfs_rq->avg.load_avg;
|
|
|
|
|
cfs_rq->last_h_load_update = now;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while ((se = cfs_rq->h_load_next) != NULL) {
|
|
|
|
|
load = cfs_rq->h_load;
|
|
|
|
|
load = div64_ul(load * se->avg.load_avg_contrib,
|
|
|
|
|
cfs_rq->runnable_load_avg + 1);
|
|
|
|
|
load = div64_ul(load * se->avg.load_avg, cfs_rq->avg.load_avg + 1);
|
|
|
|
|
cfs_rq = group_cfs_rq(se);
|
|
|
|
|
cfs_rq->h_load = load;
|
|
|
|
|
cfs_rq->last_h_load_update = now;
|
|
|
|
@@ -6063,8 +5840,8 @@ static unsigned long task_h_load(struct task_struct *p)
|
|
|
|
|
struct cfs_rq *cfs_rq = task_cfs_rq(p);
|
|
|
|
|
|
|
|
|
|
update_cfs_rq_h_load(cfs_rq);
|
|
|
|
|
return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load,
|
|
|
|
|
cfs_rq->runnable_load_avg + 1);
|
|
|
|
|
return div64_ul(p->se.avg.load_avg * cfs_rq->h_load,
|
|
|
|
|
cfs_rq->avg.load_avg + 1);
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
static inline void update_blocked_averages(int cpu)
|
|
|
|
@@ -6073,7 +5850,7 @@ static inline void update_blocked_averages(int cpu)
|
|
|
|
|
|
|
|
|
|
static unsigned long task_h_load(struct task_struct *p)
|
|
|
|
|
{
|
|
|
|
|
return p->se.avg.load_avg_contrib;
|
|
|
|
|
return p->se.avg.load_avg;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
@@ -8071,15 +7848,18 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
|
/*
|
|
|
|
|
* Remove our load from contribution when we leave sched_fair
|
|
|
|
|
* and ensure we don't carry in an old decay_count if we
|
|
|
|
|
* switch back.
|
|
|
|
|
*/
|
|
|
|
|
if (se->avg.decay_count) {
|
|
|
|
|
__synchronize_entity_decay(se);
|
|
|
|
|
subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib);
|
|
|
|
|
}
|
|
|
|
|
/* Catch up with the cfs_rq and remove our load when we leave */
|
|
|
|
|
__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq), &se->avg,
|
|
|
|
|
se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se);
|
|
|
|
|
|
|
|
|
|
cfs_rq->avg.load_avg =
|
|
|
|
|
max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
|
|
|
|
|
cfs_rq->avg.load_sum =
|
|
|
|
|
max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
|
|
|
|
|
cfs_rq->avg.util_avg =
|
|
|
|
|
max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
|
|
|
|
|
cfs_rq->avg.util_sum =
|
|
|
|
|
max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -8136,8 +7916,8 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
|
|
|
|
|
cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
|
atomic64_set(&cfs_rq->decay_counter, 1);
|
|
|
|
|
atomic_long_set(&cfs_rq->removed_load, 0);
|
|
|
|
|
atomic_long_set(&cfs_rq->removed_load_avg, 0);
|
|
|
|
|
atomic_long_set(&cfs_rq->removed_util_avg, 0);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -8182,14 +7962,14 @@ static void task_move_group_fair(struct task_struct *p, int queued)
|
|
|
|
|
if (!queued) {
|
|
|
|
|
cfs_rq = cfs_rq_of(se);
|
|
|
|
|
se->vruntime += cfs_rq->min_vruntime;
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
|
/*
|
|
|
|
|
* migrate_task_rq_fair() will have removed our previous
|
|
|
|
|
* contribution, but we must synchronize for ongoing future
|
|
|
|
|
* decay.
|
|
|
|
|
*/
|
|
|
|
|
se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
|
|
|
|
|
cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
|
|
|
|
|
/* Virtually synchronize task with its new cfs_rq */
|
|
|
|
|
p->se.avg.last_update_time = cfs_rq->avg.last_update_time;
|
|
|
|
|
cfs_rq->avg.load_avg += p->se.avg.load_avg;
|
|
|
|
|
cfs_rq->avg.load_sum += p->se.avg.load_sum;
|
|
|
|
|
cfs_rq->avg.util_avg += p->se.avg.util_avg;
|
|
|
|
|
cfs_rq->avg.util_sum += p->se.avg.util_sum;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|