Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main changes in this cycle were: - another round of rq-clock handling debugging, robustization and fixes - PELT accounting improvements - CPU hotplug related ->cpus_allowed affinity handling fixes all around the tree - ... plus misc fixes, cleanups and updates" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (35 commits) sched/x86: Update reschedule warning text crypto: N2 - Replace racy task affinity logic cpufreq/sparc-us2e: Replace racy task affinity logic cpufreq/sparc-us3: Replace racy task affinity logic cpufreq/sh: Replace racy task affinity logic cpufreq/ia64: Replace racy task affinity logic ACPI/processor: Replace racy task affinity logic ACPI/processor: Fix error handling in __acpi_processor_start() sparc/sysfs: Replace racy task affinity logic powerpc/smp: Replace open coded task affinity logic ia64/sn/hwperf: Replace racy task affinity logic ia64/salinfo: Replace racy task affinity logic workqueue: Provide work_on_cpu_safe() ia64/topology: Remove cpus_allowed manipulation sched/fair: Move the PELT constants into a generated header sched/fair: Increase PELT accuracy for small tasks sched/fair: Fix comments sched/Documentation: Add 'sched-pelt' tool sched/fair: Fix corner case in __accumulate_sum() sched/core: Remove 'task' parameter and rename tsk_restore_flags() to current_restore_flags() ...
此提交包含在:
@@ -85,21 +85,6 @@ int sysctl_sched_rt_runtime = 950000;
|
||||
/* CPUs with isolated domains */
|
||||
cpumask_var_t cpu_isolated_map;
|
||||
|
||||
/*
|
||||
* this_rq_lock - lock this runqueue and disable interrupts.
|
||||
*/
|
||||
static struct rq *this_rq_lock(void)
|
||||
__acquires(rq->lock)
|
||||
{
|
||||
struct rq *rq;
|
||||
|
||||
local_irq_disable();
|
||||
rq = this_rq();
|
||||
raw_spin_lock(&rq->lock);
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* __task_rq_lock - lock the rq @p resides on.
|
||||
*/
|
||||
@@ -233,8 +218,11 @@ void update_rq_clock(struct rq *rq)
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
if (sched_feat(WARN_DOUBLE_CLOCK))
|
||||
SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED);
|
||||
rq->clock_update_flags |= RQCF_UPDATED;
|
||||
#endif
|
||||
|
||||
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
|
||||
if (delta < 0)
|
||||
return;
|
||||
@@ -261,13 +249,14 @@ static void hrtick_clear(struct rq *rq)
|
||||
static enum hrtimer_restart hrtick(struct hrtimer *timer)
|
||||
{
|
||||
struct rq *rq = container_of(timer, struct rq, hrtick_timer);
|
||||
struct rq_flags rf;
|
||||
|
||||
WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_lock(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
rq->curr->sched_class->task_tick(rq, rq->curr, 1);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, &rf);
|
||||
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
@@ -287,11 +276,12 @@ static void __hrtick_restart(struct rq *rq)
|
||||
static void __hrtick_start(void *arg)
|
||||
{
|
||||
struct rq *rq = arg;
|
||||
struct rq_flags rf;
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_lock(rq, &rf);
|
||||
__hrtick_restart(rq);
|
||||
rq->hrtick_csd_pending = 0;
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, &rf);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -762,17 +752,23 @@ static void set_load_weight(struct task_struct *p)
|
||||
|
||||
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
update_rq_clock(rq);
|
||||
if (!(flags & ENQUEUE_NOCLOCK))
|
||||
update_rq_clock(rq);
|
||||
|
||||
if (!(flags & ENQUEUE_RESTORE))
|
||||
sched_info_queued(rq, p);
|
||||
|
||||
p->sched_class->enqueue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
update_rq_clock(rq);
|
||||
if (!(flags & DEQUEUE_NOCLOCK))
|
||||
update_rq_clock(rq);
|
||||
|
||||
if (!(flags & DEQUEUE_SAVE))
|
||||
sched_info_dequeued(rq, p);
|
||||
|
||||
p->sched_class->dequeue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
@@ -946,18 +942,19 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
|
||||
*
|
||||
* Returns (locked) new rq. Old rq's lock is released.
|
||||
*/
|
||||
static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new_cpu)
|
||||
static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
|
||||
struct task_struct *p, int new_cpu)
|
||||
{
|
||||
lockdep_assert_held(&rq->lock);
|
||||
|
||||
p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
dequeue_task(rq, p, 0);
|
||||
dequeue_task(rq, p, DEQUEUE_NOCLOCK);
|
||||
set_task_cpu(p, new_cpu);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, rf);
|
||||
|
||||
rq = cpu_rq(new_cpu);
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_lock(rq, rf);
|
||||
BUG_ON(task_cpu(p) != new_cpu);
|
||||
enqueue_task(rq, p, 0);
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
@@ -980,7 +977,8 @@ struct migration_arg {
|
||||
* So we race with normal scheduler movements, but that's OK, as long
|
||||
* as the task is no longer on this CPU.
|
||||
*/
|
||||
static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_cpu)
|
||||
static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
|
||||
struct task_struct *p, int dest_cpu)
|
||||
{
|
||||
if (unlikely(!cpu_active(dest_cpu)))
|
||||
return rq;
|
||||
@@ -989,7 +987,8 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_
|
||||
if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
|
||||
return rq;
|
||||
|
||||
rq = move_queued_task(rq, p, dest_cpu);
|
||||
update_rq_clock(rq);
|
||||
rq = move_queued_task(rq, rf, p, dest_cpu);
|
||||
|
||||
return rq;
|
||||
}
|
||||
@@ -1004,6 +1003,7 @@ static int migration_cpu_stop(void *data)
|
||||
struct migration_arg *arg = data;
|
||||
struct task_struct *p = arg->task;
|
||||
struct rq *rq = this_rq();
|
||||
struct rq_flags rf;
|
||||
|
||||
/*
|
||||
* The original target CPU might have gone down and we might
|
||||
@@ -1018,7 +1018,7 @@ static int migration_cpu_stop(void *data)
|
||||
sched_ttwu_pending();
|
||||
|
||||
raw_spin_lock(&p->pi_lock);
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_lock(rq, &rf);
|
||||
/*
|
||||
* If task_rq(p) != rq, it cannot be migrated here, because we're
|
||||
* holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
|
||||
@@ -1026,11 +1026,11 @@ static int migration_cpu_stop(void *data)
|
||||
*/
|
||||
if (task_rq(p) == rq) {
|
||||
if (task_on_rq_queued(p))
|
||||
rq = __migrate_task(rq, p, arg->dest_cpu);
|
||||
rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
|
||||
else
|
||||
p->wake_cpu = arg->dest_cpu;
|
||||
}
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, &rf);
|
||||
raw_spin_unlock(&p->pi_lock);
|
||||
|
||||
local_irq_enable();
|
||||
@@ -1063,7 +1063,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
||||
* holding rq->lock.
|
||||
*/
|
||||
lockdep_assert_held(&rq->lock);
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
|
||||
}
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
@@ -1071,7 +1071,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
||||
p->sched_class->set_cpus_allowed(p, new_mask);
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE);
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
|
||||
if (running)
|
||||
set_curr_task(rq, p);
|
||||
}
|
||||
@@ -1150,9 +1150,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
* OK, since we're going to drop the lock immediately
|
||||
* afterwards anyway.
|
||||
*/
|
||||
rq_unpin_lock(rq, &rf);
|
||||
rq = move_queued_task(rq, p, dest_cpu);
|
||||
rq_repin_lock(rq, &rf);
|
||||
rq = move_queued_task(rq, &rf, p, dest_cpu);
|
||||
}
|
||||
out:
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
@@ -1217,16 +1215,24 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
|
||||
{
|
||||
if (task_on_rq_queued(p)) {
|
||||
struct rq *src_rq, *dst_rq;
|
||||
struct rq_flags srf, drf;
|
||||
|
||||
src_rq = task_rq(p);
|
||||
dst_rq = cpu_rq(cpu);
|
||||
|
||||
rq_pin_lock(src_rq, &srf);
|
||||
rq_pin_lock(dst_rq, &drf);
|
||||
|
||||
p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
deactivate_task(src_rq, p, 0);
|
||||
set_task_cpu(p, cpu);
|
||||
activate_task(dst_rq, p, 0);
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
check_preempt_curr(dst_rq, p, 0);
|
||||
|
||||
rq_unpin_lock(dst_rq, &drf);
|
||||
rq_unpin_lock(src_rq, &srf);
|
||||
|
||||
} else {
|
||||
/*
|
||||
* Task isn't running anymore; make it appear like we migrated
|
||||
@@ -1680,7 +1686,7 @@ static void
|
||||
ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
|
||||
struct rq_flags *rf)
|
||||
{
|
||||
int en_flags = ENQUEUE_WAKEUP;
|
||||
int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
|
||||
|
||||
lockdep_assert_held(&rq->lock);
|
||||
|
||||
@@ -1726,14 +1732,13 @@ void sched_ttwu_pending(void)
|
||||
struct rq *rq = this_rq();
|
||||
struct llist_node *llist = llist_del_all(&rq->wake_list);
|
||||
struct task_struct *p;
|
||||
unsigned long flags;
|
||||
struct rq_flags rf;
|
||||
|
||||
if (!llist)
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
rq_pin_lock(rq, &rf);
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
|
||||
while (llist) {
|
||||
int wake_flags = 0;
|
||||
@@ -1747,8 +1752,7 @@ void sched_ttwu_pending(void)
|
||||
ttwu_do_activate(rq, p, wake_flags, &rf);
|
||||
}
|
||||
|
||||
rq_unpin_lock(rq, &rf);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
}
|
||||
|
||||
void scheduler_ipi(void)
|
||||
@@ -1806,7 +1810,7 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
|
||||
void wake_up_if_idle(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
struct rq_flags rf;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
@@ -1816,11 +1820,11 @@ void wake_up_if_idle(int cpu)
|
||||
if (set_nr_if_polling(rq->idle)) {
|
||||
trace_sched_wake_idle_without_ipi(cpu);
|
||||
} else {
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
if (is_idle_task(rq->curr))
|
||||
smp_send_reschedule(cpu);
|
||||
/* Else CPU is not idle, do nothing here: */
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
}
|
||||
|
||||
out:
|
||||
@@ -1846,11 +1850,10 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
|
||||
}
|
||||
#endif
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_pin_lock(rq, &rf);
|
||||
rq_lock(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
ttwu_do_activate(rq, p, wake_flags, &rf);
|
||||
rq_unpin_lock(rq, &rf);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, &rf);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2097,11 +2100,9 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
|
||||
* disabled avoiding further scheduler activity on it and we've
|
||||
* not yet picked a replacement task.
|
||||
*/
|
||||
rq_unpin_lock(rq, rf);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, rf);
|
||||
raw_spin_lock(&p->pi_lock);
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_repin_lock(rq, rf);
|
||||
rq_relock(rq, rf);
|
||||
}
|
||||
|
||||
if (!(p->state & TASK_NORMAL))
|
||||
@@ -2114,7 +2115,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
|
||||
delayacct_blkio_end();
|
||||
atomic_dec(&rq->nr_iowait);
|
||||
}
|
||||
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
|
||||
ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);
|
||||
}
|
||||
|
||||
ttwu_do_wakeup(rq, p, 0, rf);
|
||||
@@ -2555,7 +2556,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
update_rq_clock(rq);
|
||||
post_init_entity_util_avg(&p->se);
|
||||
|
||||
activate_task(rq, p, 0);
|
||||
activate_task(rq, p, ENQUEUE_NOCLOCK);
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
trace_sched_wakeup_new(p);
|
||||
check_preempt_curr(rq, p, WF_FORK);
|
||||
@@ -3093,15 +3094,18 @@ void scheduler_tick(void)
|
||||
int cpu = smp_processor_id();
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct task_struct *curr = rq->curr;
|
||||
struct rq_flags rf;
|
||||
|
||||
sched_clock_tick();
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_lock(rq, &rf);
|
||||
|
||||
update_rq_clock(rq);
|
||||
curr->sched_class->task_tick(rq, curr, 0);
|
||||
cpu_load_update_active(rq);
|
||||
calc_global_load_tick(rq);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
|
||||
rq_unlock(rq, &rf);
|
||||
|
||||
perf_event_task_tick();
|
||||
|
||||
@@ -3386,18 +3390,18 @@ static void __sched notrace __schedule(bool preempt)
|
||||
* done by the caller to avoid the race with signal_wake_up().
|
||||
*/
|
||||
smp_mb__before_spinlock();
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_pin_lock(rq, &rf);
|
||||
rq_lock(rq, &rf);
|
||||
|
||||
/* Promote REQ to ACT */
|
||||
rq->clock_update_flags <<= 1;
|
||||
update_rq_clock(rq);
|
||||
|
||||
switch_count = &prev->nivcsw;
|
||||
if (!preempt && prev->state) {
|
||||
if (unlikely(signal_pending_state(prev->state, prev))) {
|
||||
prev->state = TASK_RUNNING;
|
||||
} else {
|
||||
deactivate_task(rq, prev, DEQUEUE_SLEEP);
|
||||
deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
|
||||
prev->on_rq = 0;
|
||||
|
||||
if (prev->in_iowait) {
|
||||
@@ -3421,9 +3425,6 @@ static void __sched notrace __schedule(bool preempt)
|
||||
switch_count = &prev->nvcsw;
|
||||
}
|
||||
|
||||
if (task_on_rq_queued(prev))
|
||||
update_rq_clock(rq);
|
||||
|
||||
next = pick_next_task(rq, prev, &rf);
|
||||
clear_tsk_need_resched(prev);
|
||||
clear_preempt_need_resched();
|
||||
@@ -3439,8 +3440,7 @@ static void __sched notrace __schedule(bool preempt)
|
||||
rq = context_switch(rq, prev, next, &rf);
|
||||
} else {
|
||||
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
|
||||
rq_unpin_lock(rq, &rf);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
rq_unlock_irq(rq, &rf);
|
||||
}
|
||||
|
||||
balance_callback(rq);
|
||||
@@ -3684,7 +3684,8 @@ EXPORT_SYMBOL(default_wake_function);
|
||||
*/
|
||||
void rt_mutex_setprio(struct task_struct *p, int prio)
|
||||
{
|
||||
int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
|
||||
int oldprio, queued, running, queue_flag =
|
||||
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
|
||||
const struct sched_class *prev_class;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
@@ -3805,7 +3806,7 @@ void set_user_nice(struct task_struct *p, long nice)
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
@@ -3816,7 +3817,7 @@ void set_user_nice(struct task_struct *p, long nice)
|
||||
delta = p->prio - old_prio;
|
||||
|
||||
if (queued) {
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE);
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
|
||||
/*
|
||||
* If the task increased its priority or is running and
|
||||
* lowered its priority, then reschedule its CPU:
|
||||
@@ -4126,7 +4127,7 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
const struct sched_class *prev_class;
|
||||
struct rq_flags rf;
|
||||
int reset_on_fork;
|
||||
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
|
||||
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
|
||||
struct rq *rq;
|
||||
|
||||
/* May grab non-irq protected spin_locks: */
|
||||
@@ -4923,7 +4924,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
|
||||
*/
|
||||
SYSCALL_DEFINE0(sched_yield)
|
||||
{
|
||||
struct rq *rq = this_rq_lock();
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
local_irq_disable();
|
||||
rq = this_rq();
|
||||
rq_lock(rq, &rf);
|
||||
|
||||
schedstat_inc(rq->yld_count);
|
||||
current->sched_class->yield_task(rq);
|
||||
@@ -4932,9 +4938,8 @@ SYSCALL_DEFINE0(sched_yield)
|
||||
* Since we are going to call schedule() anyway, there's
|
||||
* no need to preempt or enable interrupts:
|
||||
*/
|
||||
__release(rq->lock);
|
||||
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
|
||||
do_raw_spin_unlock(&rq->lock);
|
||||
preempt_disable();
|
||||
rq_unlock(rq, &rf);
|
||||
sched_preempt_enable_no_resched();
|
||||
|
||||
schedule();
|
||||
@@ -5514,7 +5519,7 @@ void sched_setnuma(struct task_struct *p, int nid)
|
||||
p->numa_preferred_nid = nid;
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE);
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
|
||||
if (running)
|
||||
set_curr_task(rq, p);
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
@@ -5579,11 +5584,11 @@ static struct task_struct fake_task = {
|
||||
* there's no concurrency possible, we hold the required locks anyway
|
||||
* because of lock validation efforts.
|
||||
*/
|
||||
static void migrate_tasks(struct rq *dead_rq)
|
||||
static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
|
||||
{
|
||||
struct rq *rq = dead_rq;
|
||||
struct task_struct *next, *stop = rq->stop;
|
||||
struct rq_flags rf;
|
||||
struct rq_flags orf = *rf;
|
||||
int dest_cpu;
|
||||
|
||||
/*
|
||||
@@ -5602,9 +5607,7 @@ static void migrate_tasks(struct rq *dead_rq)
|
||||
* class method both need to have an up-to-date
|
||||
* value of rq->clock[_task]
|
||||
*/
|
||||
rq_pin_lock(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
rq_unpin_lock(rq, &rf);
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
@@ -5617,8 +5620,7 @@ static void migrate_tasks(struct rq *dead_rq)
|
||||
/*
|
||||
* pick_next_task() assumes pinned rq->lock:
|
||||
*/
|
||||
rq_repin_lock(rq, &rf);
|
||||
next = pick_next_task(rq, &fake_task, &rf);
|
||||
next = pick_next_task(rq, &fake_task, rf);
|
||||
BUG_ON(!next);
|
||||
next->sched_class->put_prev_task(rq, next);
|
||||
|
||||
@@ -5631,10 +5633,9 @@ static void migrate_tasks(struct rq *dead_rq)
|
||||
* because !cpu_active at this point, which means load-balance
|
||||
* will not interfere. Also, stop-machine.
|
||||
*/
|
||||
rq_unpin_lock(rq, &rf);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, rf);
|
||||
raw_spin_lock(&next->pi_lock);
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_relock(rq, rf);
|
||||
|
||||
/*
|
||||
* Since we're inside stop-machine, _nothing_ should have
|
||||
@@ -5648,12 +5649,12 @@ static void migrate_tasks(struct rq *dead_rq)
|
||||
|
||||
/* Find suitable destination for @next, with force if needed. */
|
||||
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
|
||||
|
||||
rq = __migrate_task(rq, next, dest_cpu);
|
||||
rq = __migrate_task(rq, rf, next, dest_cpu);
|
||||
if (rq != dead_rq) {
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, rf);
|
||||
rq = dead_rq;
|
||||
raw_spin_lock(&rq->lock);
|
||||
*rf = orf;
|
||||
rq_relock(rq, rf);
|
||||
}
|
||||
raw_spin_unlock(&next->pi_lock);
|
||||
}
|
||||
@@ -5766,7 +5767,7 @@ static int cpuset_cpu_inactive(unsigned int cpu)
|
||||
int sched_cpu_activate(unsigned int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
struct rq_flags rf;
|
||||
|
||||
set_cpu_active(cpu, true);
|
||||
|
||||
@@ -5784,12 +5785,12 @@ int sched_cpu_activate(unsigned int cpu)
|
||||
* 2) At runtime, if cpuset_cpu_active() fails to rebuild the
|
||||
* domains.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
if (rq->rd) {
|
||||
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
||||
set_rq_online(rq);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
|
||||
update_max_interval();
|
||||
|
||||
@@ -5847,18 +5848,20 @@ int sched_cpu_starting(unsigned int cpu)
|
||||
int sched_cpu_dying(unsigned int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
struct rq_flags rf;
|
||||
|
||||
/* Handle pending wakeups and then migrate everything off */
|
||||
sched_ttwu_pending();
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
if (rq->rd) {
|
||||
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
||||
set_rq_offline(rq);
|
||||
}
|
||||
migrate_tasks(rq);
|
||||
migrate_tasks(rq, &rf);
|
||||
BUG_ON(rq->nr_running != 1);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
|
||||
calc_load_migrate(rq);
|
||||
update_max_interval();
|
||||
nohz_balance_exit_idle(cpu);
|
||||
@@ -6412,7 +6415,8 @@ static void sched_change_group(struct task_struct *tsk, int type)
|
||||
*/
|
||||
void sched_move_task(struct task_struct *tsk)
|
||||
{
|
||||
int queued, running;
|
||||
int queued, running, queue_flags =
|
||||
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
@@ -6423,14 +6427,14 @@ void sched_move_task(struct task_struct *tsk)
|
||||
queued = task_on_rq_queued(tsk);
|
||||
|
||||
if (queued)
|
||||
dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
|
||||
dequeue_task(rq, tsk, queue_flags);
|
||||
if (running)
|
||||
put_prev_task(rq, tsk);
|
||||
|
||||
sched_change_group(tsk, TASK_MOVE_GROUP);
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
|
||||
enqueue_task(rq, tsk, queue_flags);
|
||||
if (running)
|
||||
set_curr_task(rq, tsk);
|
||||
|
||||
@@ -7008,14 +7012,15 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
|
||||
for_each_online_cpu(i) {
|
||||
struct cfs_rq *cfs_rq = tg->cfs_rq[i];
|
||||
struct rq *rq = cfs_rq->rq;
|
||||
struct rq_flags rf;
|
||||
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
rq_lock_irq(rq, &rf);
|
||||
cfs_rq->runtime_enabled = runtime_enabled;
|
||||
cfs_rq->runtime_remaining = 0;
|
||||
|
||||
if (cfs_rq->throttled)
|
||||
unthrottle_cfs_rq(cfs_rq);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
rq_unlock_irq(rq, &rf);
|
||||
}
|
||||
if (runtime_was_enabled && !runtime_enabled)
|
||||
cfs_bandwidth_usage_dec();
|
||||
|
@@ -717,18 +717,12 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
#include "sched-pelt.h"
|
||||
|
||||
static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
|
||||
static unsigned long task_h_load(struct task_struct *p);
|
||||
|
||||
/*
|
||||
* We choose a half-life close to 1 scheduling period.
|
||||
* Note: The tables runnable_avg_yN_inv and runnable_avg_yN_sum are
|
||||
* dependent on this value.
|
||||
*/
|
||||
#define LOAD_AVG_PERIOD 32
|
||||
#define LOAD_AVG_MAX 47742 /* maximum possible load avg */
|
||||
#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_AVG_MAX */
|
||||
|
||||
/* Give new sched_entity start runnable values to heavy its load in infant time */
|
||||
void init_entity_runnable_average(struct sched_entity *se)
|
||||
{
|
||||
@@ -2733,47 +2727,15 @@ static inline void update_cfs_shares(struct sched_entity *se)
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Precomputed fixed inverse multiplies for multiplication by y^n */
|
||||
static const u32 runnable_avg_yN_inv[] = {
|
||||
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
|
||||
0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
|
||||
0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
|
||||
0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9,
|
||||
0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80,
|
||||
0x85aac367, 0x82cd8698,
|
||||
};
|
||||
|
||||
/*
|
||||
* Precomputed \Sum y^k { 1<=k<=n }. These are floor(true_value) to prevent
|
||||
* over-estimates when re-combining.
|
||||
*/
|
||||
static const u32 runnable_avg_yN_sum[] = {
|
||||
0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103,
|
||||
9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082,
|
||||
17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
|
||||
};
|
||||
|
||||
/*
|
||||
* Precomputed \Sum y^k { 1<=k<=n, where n%32=0). Values are rolled down to
|
||||
* lower integers. See Documentation/scheduler/sched-avg.txt how these
|
||||
* were generated:
|
||||
*/
|
||||
static const u32 __accumulated_sum_N32[] = {
|
||||
0, 23371, 35056, 40899, 43820, 45281,
|
||||
46011, 46376, 46559, 46650, 46696, 46719,
|
||||
};
|
||||
|
||||
/*
|
||||
* Approximate:
|
||||
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
||||
*/
|
||||
static __always_inline u64 decay_load(u64 val, u64 n)
|
||||
static u64 decay_load(u64 val, u64 n)
|
||||
{
|
||||
unsigned int local_n;
|
||||
|
||||
if (!n)
|
||||
return val;
|
||||
else if (unlikely(n > LOAD_AVG_PERIOD * 63))
|
||||
if (unlikely(n > LOAD_AVG_PERIOD * 63))
|
||||
return 0;
|
||||
|
||||
/* after bounds checking we can collapse to 32-bit */
|
||||
@@ -2795,31 +2757,98 @@ static __always_inline u64 decay_load(u64 val, u64 n)
|
||||
return val;
|
||||
}
|
||||
|
||||
/*
|
||||
* For updates fully spanning n periods, the contribution to runnable
|
||||
* average will be: \Sum 1024*y^n
|
||||
*
|
||||
* We can compute this reasonably efficiently by combining:
|
||||
* y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for n <PERIOD}
|
||||
*/
|
||||
static u32 __compute_runnable_contrib(u64 n)
|
||||
static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
|
||||
{
|
||||
u32 contrib = 0;
|
||||
u32 c1, c2, c3 = d3; /* y^0 == 1 */
|
||||
|
||||
if (likely(n <= LOAD_AVG_PERIOD))
|
||||
return runnable_avg_yN_sum[n];
|
||||
else if (unlikely(n >= LOAD_AVG_MAX_N))
|
||||
return LOAD_AVG_MAX;
|
||||
/*
|
||||
* c1 = d1 y^p
|
||||
*/
|
||||
c1 = decay_load((u64)d1, periods);
|
||||
|
||||
/* Since n < LOAD_AVG_MAX_N, n/LOAD_AVG_PERIOD < 11 */
|
||||
contrib = __accumulated_sum_N32[n/LOAD_AVG_PERIOD];
|
||||
n %= LOAD_AVG_PERIOD;
|
||||
contrib = decay_load(contrib, n);
|
||||
return contrib + runnable_avg_yN_sum[n];
|
||||
/*
|
||||
* p-1
|
||||
* c2 = 1024 \Sum y^n
|
||||
* n=1
|
||||
*
|
||||
* inf inf
|
||||
* = 1024 ( \Sum y^n - \Sum y^n - y^0 )
|
||||
* n=0 n=p
|
||||
*/
|
||||
c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
|
||||
|
||||
return c1 + c2 + c3;
|
||||
}
|
||||
|
||||
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
|
||||
|
||||
/*
|
||||
* Accumulate the three separate parts of the sum; d1 the remainder
|
||||
* of the last (incomplete) period, d2 the span of full periods and d3
|
||||
* the remainder of the (incomplete) current period.
|
||||
*
|
||||
* d1 d2 d3
|
||||
* ^ ^ ^
|
||||
* | | |
|
||||
* |<->|<----------------->|<--->|
|
||||
* ... |---x---|------| ... |------|-----x (now)
|
||||
*
|
||||
* p-1
|
||||
* u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
|
||||
* n=1
|
||||
*
|
||||
* = u y^p + (Step 1)
|
||||
*
|
||||
* p-1
|
||||
* d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2)
|
||||
* n=1
|
||||
*/
|
||||
static __always_inline u32
|
||||
accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
|
||||
unsigned long weight, int running, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
unsigned long scale_freq, scale_cpu;
|
||||
u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
|
||||
u64 periods;
|
||||
|
||||
scale_freq = arch_scale_freq_capacity(NULL, cpu);
|
||||
scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
|
||||
|
||||
delta += sa->period_contrib;
|
||||
periods = delta / 1024; /* A period is 1024us (~1ms) */
|
||||
|
||||
/*
|
||||
* Step 1: decay old *_sum if we crossed period boundaries.
|
||||
*/
|
||||
if (periods) {
|
||||
sa->load_sum = decay_load(sa->load_sum, periods);
|
||||
if (cfs_rq) {
|
||||
cfs_rq->runnable_load_sum =
|
||||
decay_load(cfs_rq->runnable_load_sum, periods);
|
||||
}
|
||||
sa->util_sum = decay_load((u64)(sa->util_sum), periods);
|
||||
|
||||
/*
|
||||
* Step 2
|
||||
*/
|
||||
delta %= 1024;
|
||||
contrib = __accumulate_pelt_segments(periods,
|
||||
1024 - sa->period_contrib, delta);
|
||||
}
|
||||
sa->period_contrib = delta;
|
||||
|
||||
contrib = cap_scale(contrib, scale_freq);
|
||||
if (weight) {
|
||||
sa->load_sum += weight * contrib;
|
||||
if (cfs_rq)
|
||||
cfs_rq->runnable_load_sum += weight * contrib;
|
||||
}
|
||||
if (running)
|
||||
sa->util_sum += contrib * scale_cpu;
|
||||
|
||||
return periods;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can represent the historical contribution to runnable average as the
|
||||
* coefficients of a geometric series. To do this we sub-divide our runnable
|
||||
@@ -2849,13 +2878,10 @@ static u32 __compute_runnable_contrib(u64 n)
|
||||
* = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
|
||||
*/
|
||||
static __always_inline int
|
||||
__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
|
||||
___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
|
||||
unsigned long weight, int running, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
u64 delta, scaled_delta, periods;
|
||||
u32 contrib;
|
||||
unsigned int delta_w, scaled_delta_w, decayed = 0;
|
||||
unsigned long scale_freq, scale_cpu;
|
||||
u64 delta;
|
||||
|
||||
delta = now - sa->last_update_time;
|
||||
/*
|
||||
@@ -2874,83 +2900,52 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
|
||||
delta >>= 10;
|
||||
if (!delta)
|
||||
return 0;
|
||||
sa->last_update_time = now;
|
||||
|
||||
scale_freq = arch_scale_freq_capacity(NULL, cpu);
|
||||
scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
|
||||
sa->last_update_time += delta << 10;
|
||||
|
||||
/* delta_w is the amount already accumulated against our next period */
|
||||
delta_w = sa->period_contrib;
|
||||
if (delta + delta_w >= 1024) {
|
||||
decayed = 1;
|
||||
/*
|
||||
* Now we know we crossed measurement unit boundaries. The *_avg
|
||||
* accrues by two steps:
|
||||
*
|
||||
* Step 1: accumulate *_sum since last_update_time. If we haven't
|
||||
* crossed period boundaries, finish.
|
||||
*/
|
||||
if (!accumulate_sum(delta, cpu, sa, weight, running, cfs_rq))
|
||||
return 0;
|
||||
|
||||
/* how much left for next period will start over, we don't know yet */
|
||||
sa->period_contrib = 0;
|
||||
|
||||
/*
|
||||
* Now that we know we're crossing a period boundary, figure
|
||||
* out how much from delta we need to complete the current
|
||||
* period and accrue it.
|
||||
*/
|
||||
delta_w = 1024 - delta_w;
|
||||
scaled_delta_w = cap_scale(delta_w, scale_freq);
|
||||
if (weight) {
|
||||
sa->load_sum += weight * scaled_delta_w;
|
||||
if (cfs_rq) {
|
||||
cfs_rq->runnable_load_sum +=
|
||||
weight * scaled_delta_w;
|
||||
}
|
||||
}
|
||||
if (running)
|
||||
sa->util_sum += scaled_delta_w * scale_cpu;
|
||||
|
||||
delta -= delta_w;
|
||||
|
||||
/* Figure out how many additional periods this update spans */
|
||||
periods = delta / 1024;
|
||||
delta %= 1024;
|
||||
|
||||
sa->load_sum = decay_load(sa->load_sum, periods + 1);
|
||||
if (cfs_rq) {
|
||||
cfs_rq->runnable_load_sum =
|
||||
decay_load(cfs_rq->runnable_load_sum, periods + 1);
|
||||
}
|
||||
sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
|
||||
|
||||
/* Efficiently calculate \sum (1..n_period) 1024*y^i */
|
||||
contrib = __compute_runnable_contrib(periods);
|
||||
contrib = cap_scale(contrib, scale_freq);
|
||||
if (weight) {
|
||||
sa->load_sum += weight * contrib;
|
||||
if (cfs_rq)
|
||||
cfs_rq->runnable_load_sum += weight * contrib;
|
||||
}
|
||||
if (running)
|
||||
sa->util_sum += contrib * scale_cpu;
|
||||
/*
|
||||
* Step 2: update *_avg.
|
||||
*/
|
||||
sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
|
||||
if (cfs_rq) {
|
||||
cfs_rq->runnable_load_avg =
|
||||
div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
|
||||
}
|
||||
sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
|
||||
|
||||
/* Remainder of delta accrued against u_0` */
|
||||
scaled_delta = cap_scale(delta, scale_freq);
|
||||
if (weight) {
|
||||
sa->load_sum += weight * scaled_delta;
|
||||
if (cfs_rq)
|
||||
cfs_rq->runnable_load_sum += weight * scaled_delta;
|
||||
}
|
||||
if (running)
|
||||
sa->util_sum += scaled_delta * scale_cpu;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sa->period_contrib += delta;
|
||||
static int
|
||||
__update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
|
||||
{
|
||||
return ___update_load_avg(now, cpu, &se->avg, 0, 0, NULL);
|
||||
}
|
||||
|
||||
if (decayed) {
|
||||
sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
|
||||
if (cfs_rq) {
|
||||
cfs_rq->runnable_load_avg =
|
||||
div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
|
||||
}
|
||||
sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
|
||||
}
|
||||
static int
|
||||
__update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
return ___update_load_avg(now, cpu, &se->avg,
|
||||
se->on_rq * scale_load_down(se->load.weight),
|
||||
cfs_rq->curr == se, NULL);
|
||||
}
|
||||
|
||||
return decayed;
|
||||
static int
|
||||
__update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return ___update_load_avg(now, cpu, &cfs_rq->avg,
|
||||
scale_load_down(cfs_rq->load.weight),
|
||||
cfs_rq->curr != NULL, cfs_rq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3014,6 +3009,9 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
|
||||
void set_task_rq_fair(struct sched_entity *se,
|
||||
struct cfs_rq *prev, struct cfs_rq *next)
|
||||
{
|
||||
u64 p_last_update_time;
|
||||
u64 n_last_update_time;
|
||||
|
||||
if (!sched_feat(ATTACH_AGE_LOAD))
|
||||
return;
|
||||
|
||||
@@ -3024,11 +3022,11 @@ void set_task_rq_fair(struct sched_entity *se,
|
||||
* time. This will result in the wakee task is less decayed, but giving
|
||||
* the wakee more load sounds not bad.
|
||||
*/
|
||||
if (se->avg.last_update_time && prev) {
|
||||
u64 p_last_update_time;
|
||||
u64 n_last_update_time;
|
||||
if (!(se->avg.last_update_time && prev))
|
||||
return;
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
{
|
||||
u64 p_last_update_time_copy;
|
||||
u64 n_last_update_time_copy;
|
||||
|
||||
@@ -3043,14 +3041,13 @@ void set_task_rq_fair(struct sched_entity *se,
|
||||
|
||||
} while (p_last_update_time != p_last_update_time_copy ||
|
||||
n_last_update_time != n_last_update_time_copy);
|
||||
#else
|
||||
p_last_update_time = prev->avg.last_update_time;
|
||||
n_last_update_time = next->avg.last_update_time;
|
||||
#endif
|
||||
__update_load_avg(p_last_update_time, cpu_of(rq_of(prev)),
|
||||
&se->avg, 0, 0, NULL);
|
||||
se->avg.last_update_time = n_last_update_time;
|
||||
}
|
||||
#else
|
||||
p_last_update_time = prev->avg.last_update_time;
|
||||
n_last_update_time = next->avg.last_update_time;
|
||||
#endif
|
||||
__update_load_avg_blocked_se(p_last_update_time, cpu_of(rq_of(prev)), se);
|
||||
se->avg.last_update_time = n_last_update_time;
|
||||
}
|
||||
|
||||
/* Take into account change of utilization of a child task group */
|
||||
@@ -3173,6 +3170,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we need to update the load and the utilization of a blocked
|
||||
* group_entity:
|
||||
*/
|
||||
static inline bool skip_blocked_update(struct sched_entity *se)
|
||||
{
|
||||
struct cfs_rq *gcfs_rq = group_cfs_rq(se);
|
||||
|
||||
/*
|
||||
* If sched_entity still have not zero load or utilization, we have to
|
||||
* decay it:
|
||||
*/
|
||||
if (se->avg.load_avg || se->avg.util_avg)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If there is a pending propagation, we have to update the load and
|
||||
* the utilization of the sched_entity:
|
||||
*/
|
||||
if (gcfs_rq->propagate_avg)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Otherwise, the load and the utilization of the sched_entity is
|
||||
* already zero and there is no pending propagation, so it will be a
|
||||
* waste of time to try to decay it:
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
#else /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
|
||||
@@ -3265,8 +3292,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
|
||||
set_tg_cfs_propagate(cfs_rq);
|
||||
}
|
||||
|
||||
decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
|
||||
scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
|
||||
decayed = __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq);
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
smp_wmb();
|
||||
@@ -3298,11 +3324,8 @@ static inline void update_load_avg(struct sched_entity *se, int flags)
|
||||
* Track task load average for carrying it to new CPU after migrated, and
|
||||
* track group sched_entity load average for task_h_load calc in migration
|
||||
*/
|
||||
if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) {
|
||||
__update_load_avg(now, cpu, &se->avg,
|
||||
se->on_rq * scale_load_down(se->load.weight),
|
||||
cfs_rq->curr == se, NULL);
|
||||
}
|
||||
if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
|
||||
__update_load_avg_se(now, cpu, cfs_rq, se);
|
||||
|
||||
decayed = update_cfs_rq_load_avg(now, cfs_rq, true);
|
||||
decayed |= propagate_entity_load_avg(se);
|
||||
@@ -3407,7 +3430,7 @@ void sync_entity_load_avg(struct sched_entity *se)
|
||||
u64 last_update_time;
|
||||
|
||||
last_update_time = cfs_rq_last_update_time(cfs_rq);
|
||||
__update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
|
||||
__update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), se);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4271,8 +4294,9 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
|
||||
list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
|
||||
throttled_list) {
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
struct rq_flags rf;
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_lock(rq, &rf);
|
||||
if (!cfs_rq_throttled(cfs_rq))
|
||||
goto next;
|
||||
|
||||
@@ -4289,7 +4313,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
|
||||
unthrottle_cfs_rq(cfs_rq);
|
||||
|
||||
next:
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, &rf);
|
||||
|
||||
if (!remaining)
|
||||
break;
|
||||
@@ -5097,15 +5121,16 @@ void cpu_load_update_nohz_stop(void)
|
||||
unsigned long curr_jiffies = READ_ONCE(jiffies);
|
||||
struct rq *this_rq = this_rq();
|
||||
unsigned long load;
|
||||
struct rq_flags rf;
|
||||
|
||||
if (curr_jiffies == this_rq->last_load_update_tick)
|
||||
return;
|
||||
|
||||
load = weighted_cpuload(cpu_of(this_rq));
|
||||
raw_spin_lock(&this_rq->lock);
|
||||
rq_lock(this_rq, &rf);
|
||||
update_rq_clock(this_rq);
|
||||
cpu_load_update_nohz(this_rq, curr_jiffies, load);
|
||||
raw_spin_unlock(&this_rq->lock);
|
||||
rq_unlock(this_rq, &rf);
|
||||
}
|
||||
#else /* !CONFIG_NO_HZ_COMMON */
|
||||
static inline void cpu_load_update_nohz(struct rq *this_rq,
|
||||
@@ -6769,7 +6794,7 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
|
||||
lockdep_assert_held(&env->src_rq->lock);
|
||||
|
||||
p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
deactivate_task(env->src_rq, p, 0);
|
||||
deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
|
||||
set_task_cpu(p, env->dst_cpu);
|
||||
}
|
||||
|
||||
@@ -6902,7 +6927,7 @@ static void attach_task(struct rq *rq, struct task_struct *p)
|
||||
lockdep_assert_held(&rq->lock);
|
||||
|
||||
BUG_ON(task_rq(p) != rq);
|
||||
activate_task(rq, p, 0);
|
||||
activate_task(rq, p, ENQUEUE_NOCLOCK);
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
@@ -6913,9 +6938,12 @@ static void attach_task(struct rq *rq, struct task_struct *p)
|
||||
*/
|
||||
static void attach_one_task(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
raw_spin_lock(&rq->lock);
|
||||
struct rq_flags rf;
|
||||
|
||||
rq_lock(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
attach_task(rq, p);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, &rf);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -6926,8 +6954,10 @@ static void attach_tasks(struct lb_env *env)
|
||||
{
|
||||
struct list_head *tasks = &env->tasks;
|
||||
struct task_struct *p;
|
||||
struct rq_flags rf;
|
||||
|
||||
raw_spin_lock(&env->dst_rq->lock);
|
||||
rq_lock(env->dst_rq, &rf);
|
||||
update_rq_clock(env->dst_rq);
|
||||
|
||||
while (!list_empty(tasks)) {
|
||||
p = list_first_entry(tasks, struct task_struct, se.group_node);
|
||||
@@ -6936,7 +6966,7 @@ static void attach_tasks(struct lb_env *env)
|
||||
attach_task(env->dst_rq, p);
|
||||
}
|
||||
|
||||
raw_spin_unlock(&env->dst_rq->lock);
|
||||
rq_unlock(env->dst_rq, &rf);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
@@ -6944,9 +6974,9 @@ static void update_blocked_averages(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct cfs_rq *cfs_rq;
|
||||
unsigned long flags;
|
||||
struct rq_flags rf;
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
|
||||
/*
|
||||
@@ -6954,6 +6984,8 @@ static void update_blocked_averages(int cpu)
|
||||
* list_add_leaf_cfs_rq() for details.
|
||||
*/
|
||||
for_each_leaf_cfs_rq(rq, cfs_rq) {
|
||||
struct sched_entity *se;
|
||||
|
||||
/* throttled entities do not contribute to load */
|
||||
if (throttled_hierarchy(cfs_rq))
|
||||
continue;
|
||||
@@ -6961,11 +6993,12 @@ static void update_blocked_averages(int cpu)
|
||||
if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
|
||||
update_tg_load_avg(cfs_rq, 0);
|
||||
|
||||
/* Propagate pending load changes to the parent */
|
||||
if (cfs_rq->tg->se[cpu])
|
||||
update_load_avg(cfs_rq->tg->se[cpu], 0);
|
||||
/* Propagate pending load changes to the parent, if any: */
|
||||
se = cfs_rq->tg->se[cpu];
|
||||
if (se && !skip_blocked_update(se))
|
||||
update_load_avg(se, 0);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -7019,12 +7052,12 @@ static inline void update_blocked_averages(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct cfs_rq *cfs_rq = &rq->cfs;
|
||||
unsigned long flags;
|
||||
struct rq_flags rf;
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
}
|
||||
|
||||
static unsigned long task_h_load(struct task_struct *p)
|
||||
@@ -7525,6 +7558,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
|
||||
{
|
||||
struct sched_domain *child = env->sd->child;
|
||||
struct sched_group *sg = env->sd->groups;
|
||||
struct sg_lb_stats *local = &sds->local_stat;
|
||||
struct sg_lb_stats tmp_sgs;
|
||||
int load_idx, prefer_sibling = 0;
|
||||
bool overload = false;
|
||||
@@ -7541,7 +7575,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
|
||||
local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
|
||||
if (local_group) {
|
||||
sds->local = sg;
|
||||
sgs = &sds->local_stat;
|
||||
sgs = local;
|
||||
|
||||
if (env->idle != CPU_NEWLY_IDLE ||
|
||||
time_after_eq(jiffies, sg->sgc->next_update))
|
||||
@@ -7565,8 +7599,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
|
||||
* the tasks on the system).
|
||||
*/
|
||||
if (prefer_sibling && sds->local &&
|
||||
group_has_capacity(env, &sds->local_stat) &&
|
||||
(sgs->sum_nr_running > 1)) {
|
||||
group_has_capacity(env, local) &&
|
||||
(sgs->sum_nr_running > local->sum_nr_running + 1)) {
|
||||
sgs->group_no_capacity = 1;
|
||||
sgs->group_type = group_classify(sg, sgs);
|
||||
}
|
||||
@@ -8042,7 +8076,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
||||
struct sched_domain *sd_parent = sd->parent;
|
||||
struct sched_group *group;
|
||||
struct rq *busiest;
|
||||
unsigned long flags;
|
||||
struct rq_flags rf;
|
||||
struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
|
||||
|
||||
struct lb_env env = {
|
||||
@@ -8105,7 +8139,7 @@ redo:
|
||||
env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
|
||||
|
||||
more_balance:
|
||||
raw_spin_lock_irqsave(&busiest->lock, flags);
|
||||
rq_lock_irqsave(busiest, &rf);
|
||||
update_rq_clock(busiest);
|
||||
|
||||
/*
|
||||
@@ -8122,14 +8156,14 @@ more_balance:
|
||||
* See task_rq_lock() family for the details.
|
||||
*/
|
||||
|
||||
raw_spin_unlock(&busiest->lock);
|
||||
rq_unlock(busiest, &rf);
|
||||
|
||||
if (cur_ld_moved) {
|
||||
attach_tasks(&env);
|
||||
ld_moved += cur_ld_moved;
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
local_irq_restore(rf.flags);
|
||||
|
||||
if (env.flags & LBF_NEED_BREAK) {
|
||||
env.flags &= ~LBF_NEED_BREAK;
|
||||
@@ -8207,6 +8241,8 @@ more_balance:
|
||||
sd->nr_balance_failed++;
|
||||
|
||||
if (need_active_balance(&env)) {
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&busiest->lock, flags);
|
||||
|
||||
/* don't kick the active_load_balance_cpu_stop,
|
||||
@@ -8444,8 +8480,9 @@ static int active_load_balance_cpu_stop(void *data)
|
||||
struct rq *target_rq = cpu_rq(target_cpu);
|
||||
struct sched_domain *sd;
|
||||
struct task_struct *p = NULL;
|
||||
struct rq_flags rf;
|
||||
|
||||
raw_spin_lock_irq(&busiest_rq->lock);
|
||||
rq_lock_irq(busiest_rq, &rf);
|
||||
|
||||
/* make sure the requested cpu hasn't gone down in the meantime */
|
||||
if (unlikely(busiest_cpu != smp_processor_id() ||
|
||||
@@ -8496,7 +8533,7 @@ static int active_load_balance_cpu_stop(void *data)
|
||||
rcu_read_unlock();
|
||||
out_unlock:
|
||||
busiest_rq->active_balance = 0;
|
||||
raw_spin_unlock(&busiest_rq->lock);
|
||||
rq_unlock(busiest_rq, &rf);
|
||||
|
||||
if (p)
|
||||
attach_one_task(target_rq, p);
|
||||
@@ -8794,10 +8831,13 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
|
||||
* do the balance.
|
||||
*/
|
||||
if (time_after_eq(jiffies, rq->next_balance)) {
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
struct rq_flags rf;
|
||||
|
||||
rq_lock_irq(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
cpu_load_update_idle(rq);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
rq_unlock_irq(rq, &rf);
|
||||
|
||||
rebalance_domains(rq, CPU_IDLE);
|
||||
}
|
||||
|
||||
@@ -8988,8 +9028,9 @@ static void task_fork_fair(struct task_struct *p)
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se = &p->se, *curr;
|
||||
struct rq *rq = this_rq();
|
||||
struct rq_flags rf;
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_lock(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
|
||||
cfs_rq = task_cfs_rq(current);
|
||||
@@ -9010,7 +9051,7 @@ static void task_fork_fair(struct task_struct *p)
|
||||
}
|
||||
|
||||
se->vruntime -= cfs_rq->min_vruntime;
|
||||
raw_spin_unlock(&rq->lock);
|
||||
rq_unlock(rq, &rf);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -9372,7 +9413,6 @@ static DEFINE_MUTEX(shares_mutex);
|
||||
int sched_group_set_shares(struct task_group *tg, unsigned long shares)
|
||||
{
|
||||
int i;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* We can't change the weight of the root cgroup.
|
||||
@@ -9389,19 +9429,17 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
|
||||
tg->shares = shares;
|
||||
for_each_possible_cpu(i) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
struct sched_entity *se;
|
||||
struct sched_entity *se = tg->se[i];
|
||||
struct rq_flags rf;
|
||||
|
||||
se = tg->se[i];
|
||||
/* Propagate contribution to hierarchy */
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
|
||||
/* Possible calls to update_curr() need rq clock */
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
for_each_sched_entity(se) {
|
||||
update_load_avg(se, UPDATE_TG);
|
||||
update_cfs_shares(se);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
}
|
||||
|
||||
done:
|
||||
|
@@ -56,6 +56,13 @@ SCHED_FEAT(TTWU_QUEUE, true)
|
||||
*/
|
||||
SCHED_FEAT(SIS_AVG_CPU, false)
|
||||
|
||||
/*
|
||||
* Issue a WARN when we do multiple update_rq_clock() calls
|
||||
* in a single rq->lock section. Default disabled because the
|
||||
* annotations are not complete.
|
||||
*/
|
||||
SCHED_FEAT(WARN_DOUBLE_CLOCK, false)
|
||||
|
||||
#ifdef HAVE_RT_PUSH_IPI
|
||||
/*
|
||||
* In order to avoid a thundering herd attack of CPUs that are
|
||||
|
@@ -1927,6 +1927,87 @@ static int find_next_push_cpu(struct rq *rq)
|
||||
#define RT_PUSH_IPI_EXECUTING 1
|
||||
#define RT_PUSH_IPI_RESTART 2
|
||||
|
||||
/*
|
||||
* When a high priority task schedules out from a CPU and a lower priority
|
||||
* task is scheduled in, a check is made to see if there's any RT tasks
|
||||
* on other CPUs that are waiting to run because a higher priority RT task
|
||||
* is currently running on its CPU. In this case, the CPU with multiple RT
|
||||
* tasks queued on it (overloaded) needs to be notified that a CPU has opened
|
||||
* up that may be able to run one of its non-running queued RT tasks.
|
||||
*
|
||||
* On large CPU boxes, there's the case that several CPUs could schedule
|
||||
* a lower priority task at the same time, in which case it will look for
|
||||
* any overloaded CPUs that it could pull a task from. To do this, the runqueue
|
||||
* lock must be taken from that overloaded CPU. Having 10s of CPUs all fighting
|
||||
* for a single overloaded CPU's runqueue lock can produce a large latency.
|
||||
* (This has actually been observed on large boxes running cyclictest).
|
||||
* Instead of taking the runqueue lock of the overloaded CPU, each of the
|
||||
* CPUs that scheduled a lower priority task simply sends an IPI to the
|
||||
* overloaded CPU. An IPI is much cheaper than taking an runqueue lock with
|
||||
* lots of contention. The overloaded CPU will look to push its non-running
|
||||
* RT task off, and if it does, it can then ignore the other IPIs coming
|
||||
* in, and just pass those IPIs off to any other overloaded CPU.
|
||||
*
|
||||
* When a CPU schedules a lower priority task, it only sends an IPI to
|
||||
* the "next" CPU that has overloaded RT tasks. This prevents IPI storms,
|
||||
* as having 10 CPUs scheduling lower priority tasks and 10 CPUs with
|
||||
* RT overloaded tasks, would cause 100 IPIs to go out at once.
|
||||
*
|
||||
* The overloaded RT CPU, when receiving an IPI, will try to push off its
|
||||
* overloaded RT tasks and then send an IPI to the next CPU that has
|
||||
* overloaded RT tasks. This stops when all CPUs with overloaded RT tasks
|
||||
* have completed. Just because a CPU may have pushed off its own overloaded
|
||||
* RT task does not mean it should stop sending the IPI around to other
|
||||
* overloaded CPUs. There may be another RT task waiting to run on one of
|
||||
* those CPUs that are of higher priority than the one that was just
|
||||
* pushed.
|
||||
*
|
||||
* An optimization that could possibly be made is to make a CPU array similar
|
||||
* to the cpupri array mask of all running RT tasks, but for the overloaded
|
||||
* case, then the IPI could be sent to only the CPU with the highest priority
|
||||
* RT task waiting, and that CPU could send off further IPIs to the CPU with
|
||||
* the next highest waiting task. Since the overloaded case is much less likely
|
||||
* to happen, the complexity of this implementation may not be worth it.
|
||||
* Instead, just send an IPI around to all overloaded CPUs.
|
||||
*
|
||||
* The rq->rt.push_flags holds the status of the IPI that is going around.
|
||||
* A run queue can only send out a single IPI at a time. The possible flags
|
||||
* for rq->rt.push_flags are:
|
||||
*
|
||||
* (None or zero): No IPI is going around for the current rq
|
||||
* RT_PUSH_IPI_EXECUTING: An IPI for the rq is being passed around
|
||||
* RT_PUSH_IPI_RESTART: The priority of the running task for the rq
|
||||
* has changed, and the IPI should restart
|
||||
* circulating the overloaded CPUs again.
|
||||
*
|
||||
* rq->rt.push_cpu contains the CPU that is being sent the IPI. It is updated
|
||||
* before sending to the next CPU.
|
||||
*
|
||||
* Instead of having all CPUs that schedule a lower priority task send
|
||||
* an IPI to the same "first" CPU in the RT overload mask, they send it
|
||||
* to the next overloaded CPU after their own CPU. This helps distribute
|
||||
* the work when there's more than one overloaded CPU and multiple CPUs
|
||||
* scheduling in lower priority tasks.
|
||||
*
|
||||
* When a rq schedules a lower priority task than what was currently
|
||||
* running, the next CPU with overloaded RT tasks is examined first.
|
||||
* That is, if CPU 1 and 5 are overloaded, and CPU 3 schedules a lower
|
||||
* priority task, it will send an IPI first to CPU 5, then CPU 5 will
|
||||
* send to CPU 1 if it is still overloaded. CPU 1 will clear the
|
||||
* rq->rt.push_flags if RT_PUSH_IPI_RESTART is not set.
|
||||
*
|
||||
* The first CPU to notice IPI_RESTART is set, will clear that flag and then
|
||||
* send an IPI to the next overloaded CPU after the rq->cpu and not the next
|
||||
* CPU after push_cpu. That is, if CPU 1, 4 and 5 are overloaded when CPU 3
|
||||
* schedules a lower priority task, and the IPI_RESTART gets set while the
|
||||
* handling is being done on CPU 5, it will clear the flag and send it back to
|
||||
* CPU 4 instead of CPU 1.
|
||||
*
|
||||
* Note, the above logic can be disabled by turning off the sched_feature
|
||||
* RT_PUSH_IPI. Then the rq lock of the overloaded CPU will simply be
|
||||
* taken by the CPU requesting a pull and the waiting RT task will be pulled
|
||||
* by that CPU. This may be fine for machines with few CPUs.
|
||||
*/
|
||||
static void tell_cpu_to_push(struct rq *rq)
|
||||
{
|
||||
int cpu;
|
||||
|
13
kernel/sched/sched-pelt.h
一般檔案
13
kernel/sched/sched-pelt.h
一般檔案
@@ -0,0 +1,13 @@
|
||||
/* Generated by Documentation/scheduler/sched-pelt; do not modify. */
|
||||
|
||||
static const u32 runnable_avg_yN_inv[] = {
|
||||
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
|
||||
0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
|
||||
0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
|
||||
0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9,
|
||||
0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80,
|
||||
0x85aac367, 0x82cd8698,
|
||||
};
|
||||
|
||||
#define LOAD_AVG_PERIOD 32
|
||||
#define LOAD_AVG_MAX 47742
|
@@ -1331,15 +1331,17 @@ extern const u32 sched_prio_to_wmult[40];
|
||||
#define DEQUEUE_SLEEP 0x01
|
||||
#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
|
||||
#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
|
||||
#define DEQUEUE_NOCLOCK 0x08 /* matches ENQUEUE_NOCLOCK */
|
||||
|
||||
#define ENQUEUE_WAKEUP 0x01
|
||||
#define ENQUEUE_RESTORE 0x02
|
||||
#define ENQUEUE_MOVE 0x04
|
||||
#define ENQUEUE_NOCLOCK 0x08
|
||||
|
||||
#define ENQUEUE_HEAD 0x08
|
||||
#define ENQUEUE_REPLENISH 0x10
|
||||
#define ENQUEUE_HEAD 0x10
|
||||
#define ENQUEUE_REPLENISH 0x20
|
||||
#ifdef CONFIG_SMP
|
||||
#define ENQUEUE_MIGRATED 0x20
|
||||
#define ENQUEUE_MIGRATED 0x40
|
||||
#else
|
||||
#define ENQUEUE_MIGRATED 0x00
|
||||
#endif
|
||||
@@ -1624,6 +1626,7 @@ static inline void sched_avg_update(struct rq *rq) { }
|
||||
|
||||
struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
|
||||
__acquires(rq->lock);
|
||||
|
||||
struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
|
||||
__acquires(p->pi_lock)
|
||||
__acquires(rq->lock);
|
||||
@@ -1645,6 +1648,62 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
|
||||
__acquires(rq->lock)
|
||||
{
|
||||
raw_spin_lock_irqsave(&rq->lock, rf->flags);
|
||||
rq_pin_lock(rq, rf);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rq_lock_irq(struct rq *rq, struct rq_flags *rf)
|
||||
__acquires(rq->lock)
|
||||
{
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
rq_pin_lock(rq, rf);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rq_lock(struct rq *rq, struct rq_flags *rf)
|
||||
__acquires(rq->lock)
|
||||
{
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_pin_lock(rq, rf);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rq_relock(struct rq *rq, struct rq_flags *rf)
|
||||
__acquires(rq->lock)
|
||||
{
|
||||
raw_spin_lock(&rq->lock);
|
||||
rq_repin_lock(rq, rf);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
|
||||
__releases(rq->lock)
|
||||
{
|
||||
rq_unpin_lock(rq, rf);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
|
||||
__releases(rq->lock)
|
||||
{
|
||||
rq_unpin_lock(rq, rf);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rq_unlock(struct rq *rq, struct rq_flags *rf)
|
||||
__releases(rq->lock)
|
||||
{
|
||||
rq_unpin_lock(rq, rf);
|
||||
raw_spin_unlock(&rq->lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#ifdef CONFIG_PREEMPT
|
||||
|
||||
|
@@ -309,7 +309,7 @@ restart:
|
||||
account_irq_exit_time(current);
|
||||
__local_bh_enable(SOFTIRQ_OFFSET);
|
||||
WARN_ON_ONCE(in_interrupt());
|
||||
tsk_restore_flags(current, old_flags, PF_MEMALLOC);
|
||||
current_restore_flags(old_flags, PF_MEMALLOC);
|
||||
}
|
||||
|
||||
asmlinkage __visible void do_softirq(void)
|
||||
|
@@ -4734,6 +4734,29 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
|
||||
return wfc.ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(work_on_cpu);
|
||||
|
||||
/**
|
||||
* work_on_cpu_safe - run a function in thread context on a particular cpu
|
||||
* @cpu: the cpu to run on
|
||||
* @fn: the function to run
|
||||
* @arg: the function argument
|
||||
*
|
||||
* Disables CPU hotplug and calls work_on_cpu(). The caller must not hold
|
||||
* any locks which would prevent @fn from completing.
|
||||
*
|
||||
* Return: The value @fn returns.
|
||||
*/
|
||||
long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
|
||||
{
|
||||
long ret = -ENODEV;
|
||||
|
||||
get_online_cpus();
|
||||
if (cpu_online(cpu))
|
||||
ret = work_on_cpu(cpu, fn, arg);
|
||||
put_online_cpus();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(work_on_cpu_safe);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_FREEZER
|
||||
|
新增問題並參考
封鎖使用者