Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main scheduler changes in this cycle were: - support Intel Turbo Boost Max Technology 3.0 (TBM3) by introducig a notion of 'better cores', which the scheduler will prefer to schedule single threaded workloads on. (Tim Chen, Srinivas Pandruvada) - enhance the handling of asymmetric capacity CPUs further (Morten Rasmussen) - improve/fix load handling when moving tasks between task groups (Vincent Guittot) - simplify and clean up the cputime code (Stanislaw Gruszka) - improve mass fork()ed task spread a.k.a. hackbench speedup (Vincent Guittot) - make struct kthread kmalloc()ed and related fixes (Oleg Nesterov) - add uaccess atomicity debugging (when using access_ok() in the wrong context), under CONFIG_DEBUG_ATOMIC_SLEEP=y (Peter Zijlstra) - implement various fixes, cleanups and other enhancements (Daniel Bristot de Oliveira, Martin Schwidefsky, Rafael J. Wysocki)" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits) sched/core: Use load_avg for selecting idlest group sched/core: Fix find_idlest_group() for fork kthread: Don't abuse kthread_create_on_cpu() in __kthread_create_worker() kthread: Don't use to_live_kthread() in kthread_[un]park() kthread: Don't use to_live_kthread() in kthread_stop() Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function" kthread: Make struct kthread kmalloc'ed x86/uaccess, sched/preempt: Verify access_ok() context sched/x86: Make CONFIG_SCHED_MC_PRIO=y easier to enable sched/x86: Change CONFIG_SCHED_ITMT to CONFIG_SCHED_MC_PRIO x86/sched: Use #include <linux/mutex.h> instead of #include <asm/mutex.h> cpufreq/intel_pstate: Use CPPC to get max performance acpi/bus: Set _OSC for diverse core support acpi/bus: Enable HWP CPPC objects x86/sched: Add SD_ASYM_PACKING flags to x86 ITMT CPU x86/sysctl: Add sysctl for ITMT scheduling feature x86: Enable Intel Turbo Boost Max Technology 3.0 x86/topology: Define x86's arch_update_cpu_topology sched: Extend scheduler's asym packing sched/fair: Clean up the tunable parameter definitions ...
This commit is contained in:
@@ -1995,14 +1995,15 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
|
||||
* @state: the mask of task states that can be woken
|
||||
* @wake_flags: wake modifier flags (WF_*)
|
||||
*
|
||||
* Put it on the run-queue if it's not already there. The "current"
|
||||
* thread is always on the run-queue (except when the actual
|
||||
* re-schedule is in progress), and as such you're allowed to do
|
||||
* the simpler "current->state = TASK_RUNNING" to mark yourself
|
||||
* runnable without the overhead of this.
|
||||
* If (@state & @p->state) @p->state = TASK_RUNNING.
|
||||
*
|
||||
* Return: %true if @p was woken up, %false if it was already running.
|
||||
* or @state didn't match @p's state.
|
||||
* If the task was not queued/runnable, also place it back on a runqueue.
|
||||
*
|
||||
* Atomic against schedule() which would dequeue a task, also see
|
||||
* set_current_state().
|
||||
*
|
||||
* Return: %true if @p->state changes (an actual wakeup was done),
|
||||
* %false otherwise.
|
||||
*/
|
||||
static int
|
||||
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
@@ -5707,7 +5708,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
||||
printk(KERN_CONT " %*pbl",
|
||||
cpumask_pr_args(sched_group_cpus(group)));
|
||||
if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
|
||||
printk(KERN_CONT " (cpu_capacity = %d)",
|
||||
printk(KERN_CONT " (cpu_capacity = %lu)",
|
||||
group->sgc->capacity);
|
||||
}
|
||||
|
||||
@@ -6184,6 +6185,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
|
||||
* die on a /0 trap.
|
||||
*/
|
||||
sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
|
||||
sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
|
||||
|
||||
/*
|
||||
* Make sure the first group of this domain contains the
|
||||
@@ -6301,7 +6303,22 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
|
||||
WARN_ON(!sg);
|
||||
|
||||
do {
|
||||
int cpu, max_cpu = -1;
|
||||
|
||||
sg->group_weight = cpumask_weight(sched_group_cpus(sg));
|
||||
|
||||
if (!(sd->flags & SD_ASYM_PACKING))
|
||||
goto next;
|
||||
|
||||
for_each_cpu(cpu, sched_group_cpus(sg)) {
|
||||
if (max_cpu < 0)
|
||||
max_cpu = cpu;
|
||||
else if (sched_asym_prefer(cpu, max_cpu))
|
||||
max_cpu = cpu;
|
||||
}
|
||||
sg->asym_prefer_cpu = max_cpu;
|
||||
|
||||
next:
|
||||
sg = sg->next;
|
||||
} while (sg != sd->groups);
|
||||
|
||||
@@ -7602,6 +7619,7 @@ void __init sched_init(void)
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
root_task_group.shares = ROOT_TASK_GROUP_LOAD;
|
||||
INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
|
||||
rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
|
||||
/*
|
||||
* How much cpu bandwidth does root_task_group get?
|
||||
*
|
||||
|
@@ -297,7 +297,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
|
||||
for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
|
||||
seq_printf(sf, "%s %lld\n",
|
||||
cpuacct_stat_desc[stat],
|
||||
cputime64_to_clock_t(val[stat]));
|
||||
(long long)cputime64_to_clock_t(val[stat]));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@@ -128,16 +128,13 @@ static inline void task_group_account_field(struct task_struct *p, int index,
|
||||
* Account user cpu time to a process.
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @cputime: the cpu time spent in user space since the last update
|
||||
* @cputime_scaled: cputime scaled by cpu frequency
|
||||
*/
|
||||
void account_user_time(struct task_struct *p, cputime_t cputime,
|
||||
cputime_t cputime_scaled)
|
||||
void account_user_time(struct task_struct *p, cputime_t cputime)
|
||||
{
|
||||
int index;
|
||||
|
||||
/* Add user time to process. */
|
||||
p->utime += cputime;
|
||||
p->utimescaled += cputime_scaled;
|
||||
account_group_user_time(p, cputime);
|
||||
|
||||
index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
|
||||
@@ -153,16 +150,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
|
||||
* Account guest cpu time to a process.
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @cputime: the cpu time spent in virtual machine since the last update
|
||||
* @cputime_scaled: cputime scaled by cpu frequency
|
||||
*/
|
||||
static void account_guest_time(struct task_struct *p, cputime_t cputime,
|
||||
cputime_t cputime_scaled)
|
||||
static void account_guest_time(struct task_struct *p, cputime_t cputime)
|
||||
{
|
||||
u64 *cpustat = kcpustat_this_cpu->cpustat;
|
||||
|
||||
/* Add guest time to process. */
|
||||
p->utime += cputime;
|
||||
p->utimescaled += cputime_scaled;
|
||||
account_group_user_time(p, cputime);
|
||||
p->gtime += cputime;
|
||||
|
||||
@@ -180,16 +174,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
|
||||
* Account system cpu time to a process and desired cpustat field
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @cputime: the cpu time spent in kernel space since the last update
|
||||
* @cputime_scaled: cputime scaled by cpu frequency
|
||||
* @target_cputime64: pointer to cpustat field that has to be updated
|
||||
* @index: pointer to cpustat field that has to be updated
|
||||
*/
|
||||
static inline
|
||||
void __account_system_time(struct task_struct *p, cputime_t cputime,
|
||||
cputime_t cputime_scaled, int index)
|
||||
void __account_system_time(struct task_struct *p, cputime_t cputime, int index)
|
||||
{
|
||||
/* Add system time to process. */
|
||||
p->stime += cputime;
|
||||
p->stimescaled += cputime_scaled;
|
||||
account_group_system_time(p, cputime);
|
||||
|
||||
/* Add system time to cpustat. */
|
||||
@@ -204,15 +195,14 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @hardirq_offset: the offset to subtract from hardirq_count()
|
||||
* @cputime: the cpu time spent in kernel space since the last update
|
||||
* @cputime_scaled: cputime scaled by cpu frequency
|
||||
*/
|
||||
void account_system_time(struct task_struct *p, int hardirq_offset,
|
||||
cputime_t cputime, cputime_t cputime_scaled)
|
||||
cputime_t cputime)
|
||||
{
|
||||
int index;
|
||||
|
||||
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
|
||||
account_guest_time(p, cputime, cputime_scaled);
|
||||
account_guest_time(p, cputime);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -223,7 +213,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
|
||||
else
|
||||
index = CPUTIME_SYSTEM;
|
||||
|
||||
__account_system_time(p, cputime, cputime_scaled, index);
|
||||
__account_system_time(p, cputime, index);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -390,7 +380,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
||||
struct rq *rq, int ticks)
|
||||
{
|
||||
u64 cputime = (__force u64) cputime_one_jiffy * ticks;
|
||||
cputime_t scaled, other;
|
||||
cputime_t other;
|
||||
|
||||
/*
|
||||
* When returning from idle, many ticks can get accounted at
|
||||
@@ -403,7 +393,6 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
||||
if (other >= cputime)
|
||||
return;
|
||||
cputime -= other;
|
||||
scaled = cputime_to_scaled(cputime);
|
||||
|
||||
if (this_cpu_ksoftirqd() == p) {
|
||||
/*
|
||||
@@ -411,15 +400,15 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
||||
* So, we have to handle it separately here.
|
||||
* Also, p->stime needs to be updated for ksoftirqd.
|
||||
*/
|
||||
__account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
|
||||
__account_system_time(p, cputime, CPUTIME_SOFTIRQ);
|
||||
} else if (user_tick) {
|
||||
account_user_time(p, cputime, scaled);
|
||||
account_user_time(p, cputime);
|
||||
} else if (p == rq->idle) {
|
||||
account_idle_time(cputime);
|
||||
} else if (p->flags & PF_VCPU) { /* System time or guest time */
|
||||
account_guest_time(p, cputime, scaled);
|
||||
account_guest_time(p, cputime);
|
||||
} else {
|
||||
__account_system_time(p, cputime, scaled, CPUTIME_SYSTEM);
|
||||
__account_system_time(p, cputime, CPUTIME_SYSTEM);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -502,7 +491,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
|
||||
*/
|
||||
void account_process_tick(struct task_struct *p, int user_tick)
|
||||
{
|
||||
cputime_t cputime, scaled, steal;
|
||||
cputime_t cputime, steal;
|
||||
struct rq *rq = this_rq();
|
||||
|
||||
if (vtime_accounting_cpu_enabled())
|
||||
@@ -520,12 +509,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
|
||||
return;
|
||||
|
||||
cputime -= steal;
|
||||
scaled = cputime_to_scaled(cputime);
|
||||
|
||||
if (user_tick)
|
||||
account_user_time(p, cputime, scaled);
|
||||
account_user_time(p, cputime);
|
||||
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
|
||||
account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
|
||||
account_system_time(p, HARDIRQ_OFFSET, cputime);
|
||||
else
|
||||
account_idle_time(cputime);
|
||||
}
|
||||
@@ -746,7 +734,7 @@ static void __vtime_account_system(struct task_struct *tsk)
|
||||
{
|
||||
cputime_t delta_cpu = get_vtime_delta(tsk);
|
||||
|
||||
account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
|
||||
account_system_time(tsk, irq_count(), delta_cpu);
|
||||
}
|
||||
|
||||
void vtime_account_system(struct task_struct *tsk)
|
||||
@@ -767,7 +755,7 @@ void vtime_account_user(struct task_struct *tsk)
|
||||
tsk->vtime_snap_whence = VTIME_SYS;
|
||||
if (vtime_delta(tsk)) {
|
||||
delta_cpu = get_vtime_delta(tsk);
|
||||
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
|
||||
account_user_time(tsk, delta_cpu);
|
||||
}
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
}
|
||||
@@ -863,29 +851,25 @@ cputime_t task_gtime(struct task_struct *t)
|
||||
* add up the pending nohz execution time since the last
|
||||
* cputime snapshot.
|
||||
*/
|
||||
static void
|
||||
fetch_task_cputime(struct task_struct *t,
|
||||
cputime_t *u_dst, cputime_t *s_dst,
|
||||
cputime_t *u_src, cputime_t *s_src,
|
||||
cputime_t *udelta, cputime_t *sdelta)
|
||||
void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
|
||||
{
|
||||
cputime_t delta;
|
||||
unsigned int seq;
|
||||
unsigned long long delta;
|
||||
|
||||
if (!vtime_accounting_enabled()) {
|
||||
*utime = t->utime;
|
||||
*stime = t->stime;
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
*udelta = 0;
|
||||
*sdelta = 0;
|
||||
|
||||
seq = read_seqcount_begin(&t->vtime_seqcount);
|
||||
|
||||
if (u_dst)
|
||||
*u_dst = *u_src;
|
||||
if (s_dst)
|
||||
*s_dst = *s_src;
|
||||
*utime = t->utime;
|
||||
*stime = t->stime;
|
||||
|
||||
/* Task is sleeping, nothing to add */
|
||||
if (t->vtime_snap_whence == VTIME_INACTIVE ||
|
||||
is_idle_task(t))
|
||||
if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t))
|
||||
continue;
|
||||
|
||||
delta = vtime_delta(t);
|
||||
@@ -894,54 +878,10 @@ fetch_task_cputime(struct task_struct *t,
|
||||
* Task runs either in user or kernel space, add pending nohz time to
|
||||
* the right place.
|
||||
*/
|
||||
if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
|
||||
*udelta = delta;
|
||||
} else {
|
||||
if (t->vtime_snap_whence == VTIME_SYS)
|
||||
*sdelta = delta;
|
||||
}
|
||||
if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU)
|
||||
*utime += delta;
|
||||
else if (t->vtime_snap_whence == VTIME_SYS)
|
||||
*stime += delta;
|
||||
} while (read_seqcount_retry(&t->vtime_seqcount, seq));
|
||||
}
|
||||
|
||||
|
||||
void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
|
||||
{
|
||||
cputime_t udelta, sdelta;
|
||||
|
||||
if (!vtime_accounting_enabled()) {
|
||||
if (utime)
|
||||
*utime = t->utime;
|
||||
if (stime)
|
||||
*stime = t->stime;
|
||||
return;
|
||||
}
|
||||
|
||||
fetch_task_cputime(t, utime, stime, &t->utime,
|
||||
&t->stime, &udelta, &sdelta);
|
||||
if (utime)
|
||||
*utime += udelta;
|
||||
if (stime)
|
||||
*stime += sdelta;
|
||||
}
|
||||
|
||||
void task_cputime_scaled(struct task_struct *t,
|
||||
cputime_t *utimescaled, cputime_t *stimescaled)
|
||||
{
|
||||
cputime_t udelta, sdelta;
|
||||
|
||||
if (!vtime_accounting_enabled()) {
|
||||
if (utimescaled)
|
||||
*utimescaled = t->utimescaled;
|
||||
if (stimescaled)
|
||||
*stimescaled = t->stimescaled;
|
||||
return;
|
||||
}
|
||||
|
||||
fetch_task_cputime(t, utimescaled, stimescaled,
|
||||
&t->utimescaled, &t->stimescaled, &udelta, &sdelta);
|
||||
if (utimescaled)
|
||||
*utimescaled += cputime_to_scaled(udelta);
|
||||
if (stimescaled)
|
||||
*stimescaled += cputime_to_scaled(sdelta);
|
||||
}
|
||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
||||
|
@@ -586,7 +586,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
||||
|
||||
/*
|
||||
* The task might have changed its scheduling policy to something
|
||||
* different than SCHED_DEADLINE (through switched_fromd_dl()).
|
||||
* different than SCHED_DEADLINE (through switched_from_dl()).
|
||||
*/
|
||||
if (!dl_task(p)) {
|
||||
__dl_clear_params(p);
|
||||
@@ -1137,7 +1137,7 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct pin_cookie coo
|
||||
pull_dl_task(rq);
|
||||
lockdep_repin_lock(&rq->lock, cookie);
|
||||
/*
|
||||
* pull_rt_task() can drop (and re-acquire) rq->lock; this
|
||||
* pull_dl_task() can drop (and re-acquire) rq->lock; this
|
||||
* means a stop task can slip in, in which case we need to
|
||||
* re-start task selection.
|
||||
*/
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -404,6 +404,7 @@ struct cfs_rq {
|
||||
unsigned long runnable_load_avg;
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
unsigned long tg_load_avg_contrib;
|
||||
unsigned long propagate_avg;
|
||||
#endif
|
||||
atomic_long_t removed_load_avg, removed_util_avg;
|
||||
#ifndef CONFIG_64BIT
|
||||
@@ -539,6 +540,11 @@ struct dl_rq {
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static inline bool sched_asym_prefer(int a, int b)
|
||||
{
|
||||
return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
|
||||
}
|
||||
|
||||
/*
|
||||
* We add the notion of a root-domain which will be used to define per-domain
|
||||
* variables. Each exclusive cpuset essentially defines an island domain by
|
||||
@@ -623,6 +629,7 @@ struct rq {
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
/* list of leaf cfs_rq on this cpu: */
|
||||
struct list_head leaf_cfs_rq_list;
|
||||
struct list_head *tmp_alone_branch;
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
/*
|
||||
@@ -892,7 +899,8 @@ struct sched_group_capacity {
|
||||
* CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
|
||||
* for a single CPU.
|
||||
*/
|
||||
unsigned int capacity;
|
||||
unsigned long capacity;
|
||||
unsigned long min_capacity; /* Min per-CPU capacity in group */
|
||||
unsigned long next_update;
|
||||
int imbalance; /* XXX unrelated to capacity but shared group state */
|
||||
|
||||
@@ -905,6 +913,7 @@ struct sched_group {
|
||||
|
||||
unsigned int group_weight;
|
||||
struct sched_group_capacity *sgc;
|
||||
int asym_prefer_cpu; /* cpu of highest priority in group */
|
||||
|
||||
/*
|
||||
* The CPUs this group covers.
|
||||
|
Reference in New Issue
Block a user