Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler changes from Ingo Molnar:
 "The biggest change is the cleanup/simplification of the load-balancer:
  instead of the current practice of architectures twiddling scheduler
  internal data structures and providing the scheduler domains in
  colorfully inconsistent ways, we now have generic scheduler code in
  kernel/sched/core.c:sched_init_numa() that looks at the architecture's
  node_distance() parameters and (while not fully trusting it) deducts a
  NUMA topology from it.

  This inevitably changes balancing behavior - hopefully for the better.

  There are various smaller optimizations, cleanups and fixlets as well"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched: Taint kernel with TAINT_WARN after sleep-in-atomic bug
  sched: Remove stale power aware scheduling remnants and dysfunctional knobs
  sched/debug: Fix printing large integers on 32-bit platforms
  sched/fair: Improve the ->group_imb logic
  sched/nohz: Fix rq->cpu_load[] calculations
  sched/numa: Don't scale the imbalance
  sched/fair: Revert sched-domain iteration breakage
  sched/x86: Rewrite set_cpu_sibling_map()
  sched/numa: Fix the new NUMA topology bits
  sched/numa: Rewrite the CONFIG_NUMA sched domain support
  sched/fair: Propagate 'struct lb_env' usage into find_busiest_group
  sched/fair: Add some serialization to the sched_domain load-balance walk
  sched/fair: Let minimally loaded cpu balance the group
  sched: Change rq->nr_running to unsigned int
  x86/numa: Check for nonsensical topologies on real hw as well
  x86/numa: Hard partition cpu topology masks on node boundaries
  x86/numa: Allow specifying node_distance() for numa=fake
  x86/sched: Make mwait_usable() heed to "idle=" kernel parameters properly
  sched: Update documentation and comments
  sched_rt: Avoid unnecessary dequeue and enqueue of pushable tasks in set_cpus_allowed_rt()
This commit is contained in:
Linus Torvalds
2012-05-22 18:27:32 -07:00
25 changed files with 441 additions and 1005 deletions

View File

@@ -36,8 +36,6 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
extern int sched_create_sysfs_power_savings_entries(struct device *dev);
#ifdef CONFIG_HOTPLUG_CPU
extern void unregister_cpu(struct cpu *cpu);
extern ssize_t arch_cpu_probe(const char *, size_t);

View File

@@ -855,61 +855,14 @@ enum cpu_idle_type {
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
#define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */
#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
enum powersavings_balance_level {
POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package
* first for long running threads
*/
POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle
* cpu package for power savings
*/
MAX_POWERSAVINGS_BALANCE_LEVELS
};
extern int sched_mc_power_savings, sched_smt_power_savings;
static inline int sd_balance_for_mc_power(void)
{
if (sched_smt_power_savings)
return SD_POWERSAVINGS_BALANCE;
if (!sched_mc_power_savings)
return SD_PREFER_SIBLING;
return 0;
}
static inline int sd_balance_for_package_power(void)
{
if (sched_mc_power_savings | sched_smt_power_savings)
return SD_POWERSAVINGS_BALANCE;
return SD_PREFER_SIBLING;
}
extern int __weak arch_sd_sibiling_asym_packing(void);
/*
* Optimise SD flags for power savings:
* SD_BALANCE_NEWIDLE helps aggressive task consolidation and power savings.
* Keep default SD flags if sched_{smt,mc}_power_saving=0
*/
static inline int sd_power_saving_flags(void)
{
if (sched_mc_power_savings | sched_smt_power_savings)
return SD_BALANCE_NEWIDLE;
return 0;
}
struct sched_group_power {
atomic_t ref;
/*
@@ -1962,7 +1915,7 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
*/
extern unsigned long long notrace sched_clock(void);
/*
* See the comment in kernel/sched_clock.c
* See the comment in kernel/sched/clock.c
*/
extern u64 cpu_clock(int cpu);
extern u64 local_clock(void);

View File

@@ -70,7 +70,6 @@ int arch_update_cpu_topology(void);
* Below are the 3 major initializers used in building sched_domains:
* SD_SIBLING_INIT, for SMT domains
* SD_CPU_INIT, for SMP domains
* SD_NODE_INIT, for NUMA domains
*
* Any architecture that cares to do any tuning to these values should do so
* by defining their own arch-specific initializer in include/asm/topology.h.
@@ -99,7 +98,6 @@ int arch_update_cpu_topology(void);
| 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 1*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
@@ -135,8 +133,6 @@ int arch_update_cpu_topology(void);
| 0*SD_SHARE_CPUPOWER \
| 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| sd_balance_for_mc_power() \
| sd_power_saving_flags() \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
@@ -168,56 +164,18 @@ int arch_update_cpu_topology(void);
| 0*SD_SHARE_CPUPOWER \
| 0*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
| sd_balance_for_package_power() \
| sd_power_saving_flags() \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
}
#endif
/* sched_domains SD_ALLNODES_INIT for NUMA machines */
#define SD_ALLNODES_INIT (struct sched_domain) { \
.min_interval = 64, \
.max_interval = 64*num_online_cpus(), \
.busy_factor = 128, \
.imbalance_pct = 133, \
.cache_nice_tries = 1, \
.busy_idx = 3, \
.idle_idx = 3, \
.flags = 1*SD_LOAD_BALANCE \
| 1*SD_BALANCE_NEWIDLE \
| 0*SD_BALANCE_EXEC \
| 0*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 0*SD_WAKE_AFFINE \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
, \
.last_balance = jiffies, \
.balance_interval = 64, \
}
#ifndef SD_NODES_PER_DOMAIN
#define SD_NODES_PER_DOMAIN 16
#endif
#ifdef CONFIG_SCHED_BOOK
#ifndef SD_BOOK_INIT
#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
#endif
#endif /* CONFIG_SCHED_BOOK */
#ifdef CONFIG_NUMA
#ifndef SD_NODE_INIT
#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
#endif
#endif /* CONFIG_NUMA */
#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DECLARE_PER_CPU(int, numa_node);