Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler changes from Ingo Molnar:
 "The biggest change is the cleanup/simplification of the load-balancer:
  instead of the current practice of architectures twiddling scheduler
  internal data structures and providing the scheduler domains in
  colorfully inconsistent ways, we now have generic scheduler code in
  kernel/sched/core.c:sched_init_numa() that looks at the architecture's
  node_distance() parameters and (while not fully trusting it) deducts a
  NUMA topology from it.

  This inevitably changes balancing behavior - hopefully for the better.

  There are various smaller optimizations, cleanups and fixlets as well"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched: Taint kernel with TAINT_WARN after sleep-in-atomic bug
  sched: Remove stale power aware scheduling remnants and dysfunctional knobs
  sched/debug: Fix printing large integers on 32-bit platforms
  sched/fair: Improve the ->group_imb logic
  sched/nohz: Fix rq->cpu_load[] calculations
  sched/numa: Don't scale the imbalance
  sched/fair: Revert sched-domain iteration breakage
  sched/x86: Rewrite set_cpu_sibling_map()
  sched/numa: Fix the new NUMA topology bits
  sched/numa: Rewrite the CONFIG_NUMA sched domain support
  sched/fair: Propagate 'struct lb_env' usage into find_busiest_group
  sched/fair: Add some serialization to the sched_domain load-balance walk
  sched/fair: Let minimally loaded cpu balance the group
  sched: Change rq->nr_running to unsigned int
  x86/numa: Check for nonsensical topologies on real hw as well
  x86/numa: Hard partition cpu topology masks on node boundaries
  x86/numa: Allow specifying node_distance() for numa=fake
  x86/sched: Make mwait_usable() heed to "idle=" kernel parameters properly
  sched: Update documentation and comments
  sched_rt: Avoid unnecessary dequeue and enqueue of pushable tasks in set_cpus_allowed_rt()
This commit is contained in:
Linus Torvalds
2012-05-22 18:27:32 -07:00
25 changed files with 441 additions and 1005 deletions

View File

@@ -299,59 +299,90 @@ void __cpuinit smp_store_cpu_info(int id)
identify_secondary_cpu(c);
}
static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
static bool __cpuinit
topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
{
cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
"sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
"[node: %d != %d]. Ignoring dependency.\n",
cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
}
#define link_mask(_m, c1, c2) \
do { \
cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \
cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
} while (0)
static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
c->compute_unit_id == o->compute_unit_id)
return topology_sane(c, o, "smt");
} else if (c->phys_proc_id == o->phys_proc_id &&
c->cpu_core_id == o->cpu_core_id) {
return topology_sane(c, o, "smt");
}
return false;
}
static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
return topology_sane(c, o, "llc");
return false;
}
static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
if (c->phys_proc_id == o->phys_proc_id)
return topology_sane(c, o, "mc");
return false;
}
void __cpuinit set_cpu_sibling_map(int cpu)
{
int i;
bool has_mc = boot_cpu_data.x86_max_cores > 1;
bool has_smt = smp_num_siblings > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
int i;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
if (smp_num_siblings > 1) {
for_each_cpu(i, cpu_sibling_setup_mask) {
struct cpuinfo_x86 *o = &cpu_data(i);
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (c->phys_proc_id == o->phys_proc_id &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
c->compute_unit_id == o->compute_unit_id)
link_thread_siblings(cpu, i);
} else if (c->phys_proc_id == o->phys_proc_id &&
c->cpu_core_id == o->cpu_core_id) {
link_thread_siblings(cpu, i);
}
}
} else {
if (!has_smt && !has_mc) {
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
}
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, cpu_core_mask(cpu));
c->booted_cores = 1;
return;
}
for_each_cpu(i, cpu_sibling_setup_mask) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
}
if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
cpumask_set_cpu(i, cpu_core_mask(cpu));
cpumask_set_cpu(cpu, cpu_core_mask(i));
o = &cpu_data(i);
if ((i == cpu) || (has_smt && match_smt(c, o)))
link_mask(sibling, cpu, i);
if ((i == cpu) || (has_mc && match_llc(c, o)))
link_mask(llc_shared, cpu, i);
if ((i == cpu) || (has_mc && match_mc(c, o))) {
link_mask(core, cpu, i);
/*
* Does this new cpu bringup a new core?
*/
@@ -382,8 +413,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
* For perf, we return last level cache shared map.
* And for power savings, we return cpu_core_map
*/
if ((sched_mc_power_savings || sched_smt_power_savings) &&
!(cpu_has(c, X86_FEATURE_AMD_DCM)))
if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
return cpu_llc_shared_mask(cpu);