Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (26 commits) sched: Resched proper CPU on yield_to() sched: Allow users with sufficient RLIMIT_NICE to change from SCHED_IDLE policy sched: Allow SCHED_BATCH to preempt SCHED_IDLE tasks sched: Clean up the IRQ_TIME_ACCOUNTING code sched: Add #ifdef around irq time accounting functions sched, autogroup: Stop claiming ownership of the root task group sched, autogroup: Stop going ahead if autogroup is disabled sched, autogroup, sysctl: Use proc_dointvec_minmax() instead sched: Fix the group_imb logic sched: Clean up some f_b_g() comments sched: Clean up remnants of sd_idle sched: Wholesale removal of sd_idle logic sched: Add yield_to(task, preempt) functionality sched: Use a buddy to implement yield_task_fair() sched: Limit the scope of clear_buddies sched: Check the right ->nr_running in yield_task_fair() sched: Avoid expensive initial update_cfs_load(), on UP too sched: Fix switch_from_fair() sched: Simplify the idle scheduling class softirqs: Account ksoftirqd time as cpustat softirq ...
This commit is contained in:
296
kernel/sched.c
296
kernel/sched.c
@@ -324,7 +324,7 @@ struct cfs_rq {
|
||||
* 'curr' points to currently running entity on this cfs_rq.
|
||||
* It is set to NULL otherwise (i.e when none are currently running).
|
||||
*/
|
||||
struct sched_entity *curr, *next, *last;
|
||||
struct sched_entity *curr, *next, *last, *skip;
|
||||
|
||||
unsigned int nr_spread_over;
|
||||
|
||||
@@ -1683,6 +1683,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
|
||||
__release(rq2->lock);
|
||||
}
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* double_rq_lock - safely lock two runqueues
|
||||
*
|
||||
* Note this does not disable interrupts like task_rq_lock,
|
||||
* you need to do so manually before calling.
|
||||
*/
|
||||
static void double_rq_lock(struct rq *rq1, struct rq *rq2)
|
||||
__acquires(rq1->lock)
|
||||
__acquires(rq2->lock)
|
||||
{
|
||||
BUG_ON(!irqs_disabled());
|
||||
BUG_ON(rq1 != rq2);
|
||||
raw_spin_lock(&rq1->lock);
|
||||
__acquire(rq2->lock); /* Fake it out ;) */
|
||||
}
|
||||
|
||||
/*
|
||||
* double_rq_unlock - safely unlock two runqueues
|
||||
*
|
||||
* Note this does not restore interrupts like task_rq_unlock,
|
||||
* you need to do so manually after calling.
|
||||
*/
|
||||
static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
|
||||
__releases(rq1->lock)
|
||||
__releases(rq2->lock)
|
||||
{
|
||||
BUG_ON(rq1 != rq2);
|
||||
raw_spin_unlock(&rq1->lock);
|
||||
__release(rq2->lock);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void calc_load_account_idle(struct rq *this_rq);
|
||||
@@ -1877,7 +1910,7 @@ void account_system_vtime(struct task_struct *curr)
|
||||
*/
|
||||
if (hardirq_count())
|
||||
__this_cpu_add(cpu_hardirq_time, delta);
|
||||
else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
|
||||
else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
|
||||
__this_cpu_add(cpu_softirq_time, delta);
|
||||
|
||||
irq_time_write_end();
|
||||
@@ -1917,8 +1950,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
|
||||
sched_rt_avg_update(rq, irq_delta);
|
||||
}
|
||||
|
||||
static int irqtime_account_hi_update(void)
|
||||
{
|
||||
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
|
||||
unsigned long flags;
|
||||
u64 latest_ns;
|
||||
int ret = 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
latest_ns = this_cpu_read(cpu_hardirq_time);
|
||||
if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
|
||||
ret = 1;
|
||||
local_irq_restore(flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int irqtime_account_si_update(void)
|
||||
{
|
||||
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
|
||||
unsigned long flags;
|
||||
u64 latest_ns;
|
||||
int ret = 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
latest_ns = this_cpu_read(cpu_softirq_time);
|
||||
if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
|
||||
ret = 1;
|
||||
local_irq_restore(flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
|
||||
|
||||
#define sched_clock_irqtime (0)
|
||||
|
||||
static void update_rq_clock_task(struct rq *rq, s64 delta)
|
||||
{
|
||||
rq->clock_task += delta;
|
||||
@@ -2022,14 +2087,14 @@ inline int task_curr(const struct task_struct *p)
|
||||
|
||||
static inline void check_class_changed(struct rq *rq, struct task_struct *p,
|
||||
const struct sched_class *prev_class,
|
||||
int oldprio, int running)
|
||||
int oldprio)
|
||||
{
|
||||
if (prev_class != p->sched_class) {
|
||||
if (prev_class->switched_from)
|
||||
prev_class->switched_from(rq, p, running);
|
||||
p->sched_class->switched_to(rq, p, running);
|
||||
} else
|
||||
p->sched_class->prio_changed(rq, p, oldprio, running);
|
||||
prev_class->switched_from(rq, p);
|
||||
p->sched_class->switched_to(rq, p);
|
||||
} else if (oldprio != p->prio)
|
||||
p->sched_class->prio_changed(rq, p, oldprio);
|
||||
}
|
||||
|
||||
static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
|
||||
@@ -2542,6 +2607,7 @@ static void __sched_fork(struct task_struct *p)
|
||||
p->se.sum_exec_runtime = 0;
|
||||
p->se.prev_sum_exec_runtime = 0;
|
||||
p->se.nr_migrations = 0;
|
||||
p->se.vruntime = 0;
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
|
||||
@@ -3546,6 +3612,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Account system cpu time to a process and desired cpustat field
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @cputime: the cpu time spent in kernel space since the last update
|
||||
* @cputime_scaled: cputime scaled by cpu frequency
|
||||
* @target_cputime64: pointer to cpustat field that has to be updated
|
||||
*/
|
||||
static inline
|
||||
void __account_system_time(struct task_struct *p, cputime_t cputime,
|
||||
cputime_t cputime_scaled, cputime64_t *target_cputime64)
|
||||
{
|
||||
cputime64_t tmp = cputime_to_cputime64(cputime);
|
||||
|
||||
/* Add system time to process. */
|
||||
p->stime = cputime_add(p->stime, cputime);
|
||||
p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
|
||||
account_group_system_time(p, cputime);
|
||||
|
||||
/* Add system time to cpustat. */
|
||||
*target_cputime64 = cputime64_add(*target_cputime64, tmp);
|
||||
cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
|
||||
|
||||
/* Account for system time used */
|
||||
acct_update_integrals(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Account system cpu time to a process.
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
@@ -3557,36 +3649,26 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
|
||||
cputime_t cputime, cputime_t cputime_scaled)
|
||||
{
|
||||
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
|
||||
cputime64_t tmp;
|
||||
cputime64_t *target_cputime64;
|
||||
|
||||
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
|
||||
account_guest_time(p, cputime, cputime_scaled);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add system time to process. */
|
||||
p->stime = cputime_add(p->stime, cputime);
|
||||
p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
|
||||
account_group_system_time(p, cputime);
|
||||
|
||||
/* Add system time to cpustat. */
|
||||
tmp = cputime_to_cputime64(cputime);
|
||||
if (hardirq_count() - hardirq_offset)
|
||||
cpustat->irq = cputime64_add(cpustat->irq, tmp);
|
||||
target_cputime64 = &cpustat->irq;
|
||||
else if (in_serving_softirq())
|
||||
cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
|
||||
target_cputime64 = &cpustat->softirq;
|
||||
else
|
||||
cpustat->system = cputime64_add(cpustat->system, tmp);
|
||||
target_cputime64 = &cpustat->system;
|
||||
|
||||
cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
|
||||
|
||||
/* Account for system time used */
|
||||
acct_update_integrals(p);
|
||||
__account_system_time(p, cputime, cputime_scaled, target_cputime64);
|
||||
}
|
||||
|
||||
/*
|
||||
* Account for involuntary wait time.
|
||||
* @steal: the cpu time spent in involuntary wait
|
||||
* @cputime: the cpu time spent in involuntary wait
|
||||
*/
|
||||
void account_steal_time(cputime_t cputime)
|
||||
{
|
||||
@@ -3614,6 +3696,73 @@ void account_idle_time(cputime_t cputime)
|
||||
|
||||
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
|
||||
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
|
||||
/*
|
||||
* Account a tick to a process and cpustat
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
* @user_tick: is the tick from userspace
|
||||
* @rq: the pointer to rq
|
||||
*
|
||||
* Tick demultiplexing follows the order
|
||||
* - pending hardirq update
|
||||
* - pending softirq update
|
||||
* - user_time
|
||||
* - idle_time
|
||||
* - system time
|
||||
* - check for guest_time
|
||||
* - else account as system_time
|
||||
*
|
||||
* Check for hardirq is done both for system and user time as there is
|
||||
* no timer going off while we are on hardirq and hence we may never get an
|
||||
* opportunity to update it solely in system time.
|
||||
* p->stime and friends are only updated on system time and not on irq
|
||||
* softirq as those do not count in task exec_runtime any more.
|
||||
*/
|
||||
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
||||
struct rq *rq)
|
||||
{
|
||||
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
|
||||
cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
|
||||
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
|
||||
|
||||
if (irqtime_account_hi_update()) {
|
||||
cpustat->irq = cputime64_add(cpustat->irq, tmp);
|
||||
} else if (irqtime_account_si_update()) {
|
||||
cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
|
||||
} else if (this_cpu_ksoftirqd() == p) {
|
||||
/*
|
||||
* ksoftirqd time do not get accounted in cpu_softirq_time.
|
||||
* So, we have to handle it separately here.
|
||||
* Also, p->stime needs to be updated for ksoftirqd.
|
||||
*/
|
||||
__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
|
||||
&cpustat->softirq);
|
||||
} else if (user_tick) {
|
||||
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
|
||||
} else if (p == rq->idle) {
|
||||
account_idle_time(cputime_one_jiffy);
|
||||
} else if (p->flags & PF_VCPU) { /* System time or guest time */
|
||||
account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
|
||||
} else {
|
||||
__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
|
||||
&cpustat->system);
|
||||
}
|
||||
}
|
||||
|
||||
static void irqtime_account_idle_ticks(int ticks)
|
||||
{
|
||||
int i;
|
||||
struct rq *rq = this_rq();
|
||||
|
||||
for (i = 0; i < ticks; i++)
|
||||
irqtime_account_process_tick(current, 0, rq);
|
||||
}
|
||||
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
|
||||
static void irqtime_account_idle_ticks(int ticks) {}
|
||||
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
|
||||
struct rq *rq) {}
|
||||
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
|
||||
|
||||
/*
|
||||
* Account a single tick of cpu time.
|
||||
* @p: the process that the cpu time gets accounted to
|
||||
@@ -3624,6 +3773,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
|
||||
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
|
||||
struct rq *rq = this_rq();
|
||||
|
||||
if (sched_clock_irqtime) {
|
||||
irqtime_account_process_tick(p, user_tick, rq);
|
||||
return;
|
||||
}
|
||||
|
||||
if (user_tick)
|
||||
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
|
||||
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
|
||||
@@ -3649,6 +3803,12 @@ void account_steal_ticks(unsigned long ticks)
|
||||
*/
|
||||
void account_idle_ticks(unsigned long ticks)
|
||||
{
|
||||
|
||||
if (sched_clock_irqtime) {
|
||||
irqtime_account_idle_ticks(ticks);
|
||||
return;
|
||||
}
|
||||
|
||||
account_idle_time(jiffies_to_cputime(ticks));
|
||||
}
|
||||
|
||||
@@ -4547,11 +4707,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
||||
|
||||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (on_rq) {
|
||||
if (on_rq)
|
||||
enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
|
||||
|
||||
check_class_changed(rq, p, prev_class, oldprio, running);
|
||||
}
|
||||
check_class_changed(rq, p, prev_class, oldprio);
|
||||
task_rq_unlock(rq, &flags);
|
||||
}
|
||||
|
||||
@@ -4799,12 +4958,15 @@ recheck:
|
||||
param->sched_priority > rlim_rtprio)
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like positive nice levels, dont allow tasks to
|
||||
* move out of SCHED_IDLE either:
|
||||
* Treat SCHED_IDLE as nice 20. Only allow a switch to
|
||||
* SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
|
||||
*/
|
||||
if (p->policy == SCHED_IDLE && policy != SCHED_IDLE)
|
||||
return -EPERM;
|
||||
if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
|
||||
if (!can_nice(p, TASK_NICE(p)))
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
/* can't change other user's priorities */
|
||||
if (!check_same_owner(p))
|
||||
@@ -4879,11 +5041,10 @@ recheck:
|
||||
|
||||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (on_rq) {
|
||||
if (on_rq)
|
||||
activate_task(rq, p, 0);
|
||||
|
||||
check_class_changed(rq, p, prev_class, oldprio, running);
|
||||
}
|
||||
check_class_changed(rq, p, prev_class, oldprio);
|
||||
__task_rq_unlock(rq);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
@@ -5300,6 +5461,65 @@ void __sched yield(void)
|
||||
}
|
||||
EXPORT_SYMBOL(yield);
|
||||
|
||||
/**
|
||||
* yield_to - yield the current processor to another thread in
|
||||
* your thread group, or accelerate that thread toward the
|
||||
* processor it's on.
|
||||
*
|
||||
* It's the caller's job to ensure that the target task struct
|
||||
* can't go away on us before we can do any checks.
|
||||
*
|
||||
* Returns true if we indeed boosted the target task.
|
||||
*/
|
||||
bool __sched yield_to(struct task_struct *p, bool preempt)
|
||||
{
|
||||
struct task_struct *curr = current;
|
||||
struct rq *rq, *p_rq;
|
||||
unsigned long flags;
|
||||
bool yielded = 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
rq = this_rq();
|
||||
|
||||
again:
|
||||
p_rq = task_rq(p);
|
||||
double_rq_lock(rq, p_rq);
|
||||
while (task_rq(p) != p_rq) {
|
||||
double_rq_unlock(rq, p_rq);
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (!curr->sched_class->yield_to_task)
|
||||
goto out;
|
||||
|
||||
if (curr->sched_class != p->sched_class)
|
||||
goto out;
|
||||
|
||||
if (task_running(p_rq, p) || p->state)
|
||||
goto out;
|
||||
|
||||
yielded = curr->sched_class->yield_to_task(rq, p, preempt);
|
||||
if (yielded) {
|
||||
schedstat_inc(rq, yld_count);
|
||||
/*
|
||||
* Make p's CPU reschedule; pick_next_entity takes care of
|
||||
* fairness.
|
||||
*/
|
||||
if (preempt && rq != p_rq)
|
||||
resched_task(p_rq->curr);
|
||||
}
|
||||
|
||||
out:
|
||||
double_rq_unlock(rq, p_rq);
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (yielded)
|
||||
schedule();
|
||||
|
||||
return yielded;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(yield_to);
|
||||
|
||||
/*
|
||||
* This task is about to go to sleep on IO. Increment rq->nr_iowait so
|
||||
* that process accounting knows that this is a task in IO wait state.
|
||||
@@ -7773,6 +7993,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
|
||||
INIT_LIST_HEAD(&cfs_rq->tasks);
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
cfs_rq->rq = rq;
|
||||
/* allow initial update_cfs_load() to truncate */
|
||||
#ifdef CONFIG_SMP
|
||||
cfs_rq->load_stamp = 1;
|
||||
#endif
|
||||
#endif
|
||||
cfs_rq->min_vruntime = (u64)(-(1LL << 20));
|
||||
}
|
||||
@@ -8086,6 +8310,8 @@ EXPORT_SYMBOL(__might_sleep);
|
||||
#ifdef CONFIG_MAGIC_SYSRQ
|
||||
static void normalize_task(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
const struct sched_class *prev_class = p->sched_class;
|
||||
int old_prio = p->prio;
|
||||
int on_rq;
|
||||
|
||||
on_rq = p->se.on_rq;
|
||||
@@ -8096,6 +8322,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
|
||||
activate_task(rq, p, 0);
|
||||
resched_task(rq->curr);
|
||||
}
|
||||
|
||||
check_class_changed(rq, p, prev_class, old_prio);
|
||||
}
|
||||
|
||||
void normalize_rt_tasks(void)
|
||||
@@ -8487,7 +8715,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
|
||||
/* Propagate contribution to hierarchy */
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
for_each_sched_entity(se)
|
||||
update_cfs_shares(group_cfs_rq(se), 0);
|
||||
update_cfs_shares(group_cfs_rq(se));
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user