Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main changes in this cycle are: - Various NUMA scheduling updates: harmonize the load-balancer and NUMA placement logic to not work against each other. The intended result is better locality, better utilization and fewer migrations. - Introduce Thermal Pressure tracking and optimizations, to improve task placement on thermally overloaded systems. - Implement frequency invariant scheduler accounting on (some) x86 CPUs. This is done by observing and sampling the 'recent' CPU frequency average at ~tick boundaries. The CPU provides this data via the APERF/MPERF MSRs. This hopefully makes our capacity estimates more precise and keeps tasks on the same CPU better even if it might seem overloaded at a lower momentary frequency. (As usual, turbo mode is a complication that we resolve by observing the maximum frequency and renormalizing to it.) - Add asymmetric CPU capacity wakeup scan to improve capacity utilization on asymmetric topologies. (big.LITTLE systems) - PSI fixes and optimizations. - RT scheduling capacity awareness fixes & improvements. - Optimize the CONFIG_RT_GROUP_SCHED constraints code. - Misc fixes, cleanups and optimizations - see the changelog for details" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (62 commits) threads: Update PID limit comment according to futex UAPI change sched/fair: Fix condition of avg_load calculation sched/rt: cpupri_find: Trigger a full search as fallback kthread: Do not preempt current task if it is going to call schedule() sched/fair: Improve spreading of utilization sched: Avoid scale real weight down to zero psi: Move PF_MEMSTALL out of task->flags MAINTAINERS: Add maintenance information for psi psi: Optimize switching tasks inside shared cgroups psi: Fix cpu.pressure for cpu.max and competing cgroups sched/core: Distribute tasks within affinity masks sched/fair: Fix enqueue_task_fair warning thermal/cpu-cooling, sched/core: Move the arch_set_thermal_pressure() API to generic scheduler code sched/rt: Remove unnecessary push for unfit tasks sched/rt: Allow pulling unfitting task sched/rt: Optimize cpupri_find() on non-heterogenous systems sched/rt: Re-instate old behavior in select_task_rq_rt() sched/rt: cpupri_find: Implement fallback mechanism for !fit case sched/fair: Fix reordering of enqueue/dequeue_task_fair() sched/fair: Fix runnable_avg for throttled cfs ...
This commit is contained in:
@@ -356,28 +356,30 @@ struct util_est {
|
||||
} __attribute__((__aligned__(sizeof(u64))));
|
||||
|
||||
/*
|
||||
* The load_avg/util_avg accumulates an infinite geometric series
|
||||
* (see __update_load_avg() in kernel/sched/fair.c).
|
||||
* The load/runnable/util_avg accumulates an infinite geometric series
|
||||
* (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
|
||||
*
|
||||
* [load_avg definition]
|
||||
*
|
||||
* load_avg = runnable% * scale_load_down(load)
|
||||
*
|
||||
* where runnable% is the time ratio that a sched_entity is runnable.
|
||||
* For cfs_rq, it is the aggregated load_avg of all runnable and
|
||||
* blocked sched_entities.
|
||||
* [runnable_avg definition]
|
||||
*
|
||||
* runnable_avg = runnable% * SCHED_CAPACITY_SCALE
|
||||
*
|
||||
* [util_avg definition]
|
||||
*
|
||||
* util_avg = running% * SCHED_CAPACITY_SCALE
|
||||
*
|
||||
* where running% is the time ratio that a sched_entity is running on
|
||||
* a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
|
||||
* and blocked sched_entities.
|
||||
* where runnable% is the time ratio that a sched_entity is runnable and
|
||||
* running% the time ratio that a sched_entity is running.
|
||||
*
|
||||
* load_avg and util_avg don't direcly factor frequency scaling and CPU
|
||||
* capacity scaling. The scaling is done through the rq_clock_pelt that
|
||||
* is used for computing those signals (see update_rq_clock_pelt())
|
||||
* For cfs_rq, they are the aggregated values of all runnable and blocked
|
||||
* sched_entities.
|
||||
*
|
||||
* The load/runnable/util_avg doesn't direcly factor frequency scaling and CPU
|
||||
* capacity scaling. The scaling is done through the rq_clock_pelt that is used
|
||||
* for computing those signals (see update_rq_clock_pelt())
|
||||
*
|
||||
* N.B., the above ratios (runnable% and running%) themselves are in the
|
||||
* range of [0, 1]. To do fixed point arithmetics, we therefore scale them
|
||||
@@ -401,11 +403,11 @@ struct util_est {
|
||||
struct sched_avg {
|
||||
u64 last_update_time;
|
||||
u64 load_sum;
|
||||
u64 runnable_load_sum;
|
||||
u64 runnable_sum;
|
||||
u32 util_sum;
|
||||
u32 period_contrib;
|
||||
unsigned long load_avg;
|
||||
unsigned long runnable_load_avg;
|
||||
unsigned long runnable_avg;
|
||||
unsigned long util_avg;
|
||||
struct util_est util_est;
|
||||
} ____cacheline_aligned;
|
||||
@@ -449,7 +451,6 @@ struct sched_statistics {
|
||||
struct sched_entity {
|
||||
/* For load-balancing: */
|
||||
struct load_weight load;
|
||||
unsigned long runnable_weight;
|
||||
struct rb_node run_node;
|
||||
struct list_head group_node;
|
||||
unsigned int on_rq;
|
||||
@@ -470,6 +471,8 @@ struct sched_entity {
|
||||
struct cfs_rq *cfs_rq;
|
||||
/* rq "owned" by this entity/group: */
|
||||
struct cfs_rq *my_q;
|
||||
/* cached value of my_q->h_nr_running */
|
||||
unsigned long runnable_weight;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -782,9 +785,12 @@ struct task_struct {
|
||||
unsigned frozen:1;
|
||||
#endif
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
/* to be used once the psi infrastructure lands upstream. */
|
||||
unsigned use_memdelay:1;
|
||||
#endif
|
||||
#ifdef CONFIG_PSI
|
||||
/* Stalled due to lack of memory */
|
||||
unsigned in_memstall:1;
|
||||
#endif
|
||||
|
||||
unsigned long atomic_flags; /* Flags requiring atomic access. */
|
||||
|
||||
@@ -1479,7 +1485,6 @@ extern struct pid *cad_pid;
|
||||
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
||||
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
|
||||
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
|
||||
#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */
|
||||
#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
|
||||
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
|
||||
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
|
||||
|
Reference in New Issue
Block a user