Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler changes from Ingo Molnar:
 "Continued quest to clean up and enhance the cputime code by Frederic
  Weisbecker, in preparation for future tickless kernel features.

  Other than that, smallish changes."

Fix up trivial conflicts due to additions next to each other in arch/{x86/}Kconfig

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  cputime: Make finegrained irqtime accounting generally available
  cputime: Gather time/stats accounting config options into a single menu
  ia64: Reuse system and user vtime accounting functions on task switch
  ia64: Consolidate user vtime accounting
  vtime: Consolidate system/idle context detection
  cputime: Use a proper subsystem naming for vtime related APIs
  sched: cpu_power: enable ARCH_POWER
  sched/nohz: Clean up select_nohz_load_balancer()
  sched: Fix load avg vs. cpu-hotplug
  sched: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW
  sched: Fix nohz_idle_balance()
  sched: Remove useless code in yield_to()
  sched: Add time unit suffix to sched sysctl knobs
  sched/debug: Limit sd->*_idx range on sysctl
  sched: Remove AFFINE_WAKEUPS feature flag
  s390: Remove leftover account_tick_vtime() header
  cputime: Consolidate vtime handling on context switch
  sched: Move cputime code to its own file
  cputime: Generalize CONFIG_VIRT_CPU_ACCOUNTING
  tile: Remove SD_PREFER_LOCAL leftover
  ...
This commit is contained in:
Linus Torvalds
2012-10-01 10:43:39 -07:00
32 changed files with 903 additions and 917 deletions

View File

@@ -597,7 +597,7 @@ calc_delta_fair(unsigned long delta, struct sched_entity *se)
/*
* The idea is to set a period in which each task runs once.
*
* When there are too many tasks (sysctl_sched_nr_latency) we have to stretch
* When there are too many tasks (sched_nr_latency) we have to stretch
* this period because otherwise the slices get too small.
*
* p = (nr <= nl) ? l : l*nr/nl
@@ -2700,7 +2700,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
int prev_cpu = task_cpu(p);
int new_cpu = cpu;
int want_affine = 0;
int want_sd = 1;
int sync = wake_flags & WF_SYNC;
if (p->nr_cpus_allowed == 1)
@@ -2717,27 +2716,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
if (!(tmp->flags & SD_LOAD_BALANCE))
continue;
/*
* If power savings logic is enabled for a domain, see if we
* are not overloaded, if so, don't balance wider.
*/
if (tmp->flags & (SD_PREFER_LOCAL)) {
unsigned long power = 0;
unsigned long nr_running = 0;
unsigned long capacity;
int i;
for_each_cpu(i, sched_domain_span(tmp)) {
power += power_of(i);
nr_running += cpu_rq(i)->cfs.nr_running;
}
capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
if (nr_running < capacity)
want_sd = 0;
}
/*
* If both cpu and prev_cpu are part of this domain,
* cpu is a valid SD_WAKE_AFFINE target.
@@ -2745,21 +2723,15 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
affine_sd = tmp;
want_affine = 0;
break;
}
if (!want_sd && !want_affine)
break;
if (!(tmp->flags & sd_flag))
continue;
if (want_sd)
if (tmp->flags & sd_flag)
sd = tmp;
}
if (affine_sd) {
if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
prev_cpu = cpu;
new_cpu = select_idle_sibling(p, prev_cpu);
@@ -4295,7 +4267,7 @@ redo:
goto out_balanced;
}
BUG_ON(busiest == this_rq);
BUG_ON(busiest == env.dst_rq);
schedstat_add(sd, lb_imbalance[idle], env.imbalance);
@@ -4316,7 +4288,7 @@ redo:
update_h_load(env.src_cpu);
more_balance:
local_irq_save(flags);
double_rq_lock(this_rq, busiest);
double_rq_lock(env.dst_rq, busiest);
/*
* cur_ld_moved - load moved in current iteration
@@ -4324,7 +4296,7 @@ more_balance:
*/
cur_ld_moved = move_tasks(&env);
ld_moved += cur_ld_moved;
double_rq_unlock(this_rq, busiest);
double_rq_unlock(env.dst_rq, busiest);
local_irq_restore(flags);
if (env.flags & LBF_NEED_BREAK) {
@@ -4360,8 +4332,7 @@ more_balance:
if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 &&
lb_iterations++ < max_lb_iterations) {
this_rq = cpu_rq(env.new_dst_cpu);
env.dst_rq = this_rq;
env.dst_rq = cpu_rq(env.new_dst_cpu);
env.dst_cpu = env.new_dst_cpu;
env.flags &= ~LBF_SOME_PINNED;
env.loop = 0;
@@ -4646,7 +4617,7 @@ static void nohz_balancer_kick(int cpu)
return;
}
static inline void clear_nohz_tick_stopped(int cpu)
static inline void nohz_balance_exit_idle(int cpu)
{
if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
@@ -4686,28 +4657,23 @@ void set_cpu_sd_state_idle(void)
}
/*
* This routine will record that this cpu is going idle with tick stopped.
* This routine will record that the cpu is going idle with tick stopped.
* This info will be used in performing idle load balancing in the future.
*/
void select_nohz_load_balancer(int stop_tick)
void nohz_balance_enter_idle(int cpu)
{
int cpu = smp_processor_id();
/*
* If this cpu is going down, then nothing needs to be done.
*/
if (!cpu_active(cpu))
return;
if (stop_tick) {
if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
return;
if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
return;
cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
atomic_inc(&nohz.nr_cpus);
set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
}
return;
cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
atomic_inc(&nohz.nr_cpus);
set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
}
static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
@@ -4715,7 +4681,7 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
{
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DYING:
clear_nohz_tick_stopped(smp_processor_id());
nohz_balance_exit_idle(smp_processor_id());
return NOTIFY_OK;
default:
return NOTIFY_DONE;
@@ -4837,14 +4803,15 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
if (need_resched())
break;
raw_spin_lock_irq(&this_rq->lock);
update_rq_clock(this_rq);
update_idle_cpu_load(this_rq);
raw_spin_unlock_irq(&this_rq->lock);
rq = cpu_rq(balance_cpu);
raw_spin_lock_irq(&rq->lock);
update_rq_clock(rq);
update_idle_cpu_load(rq);
raw_spin_unlock_irq(&rq->lock);
rebalance_domains(balance_cpu, CPU_IDLE);
rq = cpu_rq(balance_cpu);
if (time_after(this_rq->next_balance, rq->next_balance))
this_rq->next_balance = rq->next_balance;
}
@@ -4875,7 +4842,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
* busy tick after returning from idle, we will update the busy stats.
*/
set_cpu_sd_state_busy();
clear_nohz_tick_stopped(cpu);
nohz_balance_exit_idle(cpu);
/*
* None are in tickless mode and hence no need for NOHZ idle load