Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main scheduler changes in this cycle were:

   - support Intel Turbo Boost Max Technology 3.0 (TBM3) by introducig a
     notion of 'better cores', which the scheduler will prefer to
     schedule single threaded workloads on. (Tim Chen, Srinivas
     Pandruvada)

   - enhance the handling of asymmetric capacity CPUs further (Morten
     Rasmussen)

   - improve/fix load handling when moving tasks between task groups
     (Vincent Guittot)

   - simplify and clean up the cputime code (Stanislaw Gruszka)

   - improve mass fork()ed task spread a.k.a. hackbench speedup (Vincent
     Guittot)

   - make struct kthread kmalloc()ed and related fixes (Oleg Nesterov)

   - add uaccess atomicity debugging (when using access_ok() in the
     wrong context), under CONFIG_DEBUG_ATOMIC_SLEEP=y (Peter Zijlstra)

   - implement various fixes, cleanups and other enhancements (Daniel
     Bristot de Oliveira, Martin Schwidefsky, Rafael J. Wysocki)"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits)
  sched/core: Use load_avg for selecting idlest group
  sched/core: Fix find_idlest_group() for fork
  kthread: Don't abuse kthread_create_on_cpu() in __kthread_create_worker()
  kthread: Don't use to_live_kthread() in kthread_[un]park()
  kthread: Don't use to_live_kthread() in kthread_stop()
  Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function"
  kthread: Make struct kthread kmalloc'ed
  x86/uaccess, sched/preempt: Verify access_ok() context
  sched/x86: Make CONFIG_SCHED_MC_PRIO=y easier to enable
  sched/x86: Change CONFIG_SCHED_ITMT to CONFIG_SCHED_MC_PRIO
  x86/sched: Use #include <linux/mutex.h> instead of #include <asm/mutex.h>
  cpufreq/intel_pstate: Use CPPC to get max performance
  acpi/bus: Set _OSC for diverse core support
  acpi/bus: Enable HWP CPPC objects
  x86/sched: Add SD_ASYM_PACKING flags to x86 ITMT CPU
  x86/sysctl: Add sysctl for ITMT scheduling feature
  x86: Enable Intel Turbo Boost Max Technology 3.0
  x86/topology: Define x86's arch_update_cpu_topology
  sched: Extend scheduler's asym packing
  sched/fair: Clean up the tunable parameter definitions
  ...
Bu işleme şunda yer alıyor:
Linus Torvalds
2016-12-12 12:15:10 -08:00
işleme 92c020d08d
36 değiştirilmiş dosya ile 1155 ekleme ve 409 silme

Dosyayı Görüntüle

@@ -939,6 +939,27 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
config SCHED_MC_PRIO
bool "CPU core priorities scheduler support"
depends on SCHED_MC && CPU_SUP_INTEL
select X86_INTEL_PSTATE
select CPU_FREQ
default y
---help---
Intel Turbo Boost Max Technology 3.0 enabled CPUs have a
core ordering determined at manufacturing time, which allows
certain cores to reach higher turbo frequencies (when running
single threaded workloads) than others.
Enabling this kernel feature teaches the scheduler about
the TBM3 (aka ITMT) priority order of the CPU cores and adjusts the
scheduler's CPU selection logic accordingly, so that higher
overall system performance can be achieved.
This feature will have no effect on CPUs without this feature.
If unsure say Y here.
source "kernel/Kconfig.preempt"
config UP_LATE_INIT

Dosyayı Görüntüle

@@ -24,7 +24,13 @@ static __always_inline int preempt_count(void)
static __always_inline void preempt_count_set(int pc)
{
raw_cpu_write_4(__preempt_count, pc);
int old, new;
do {
old = raw_cpu_read_4(__preempt_count);
new = (old & PREEMPT_NEED_RESCHED) |
(pc & ~PREEMPT_NEED_RESCHED);
} while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
}
/*

Dosyayı Görüntüle

@@ -146,4 +146,36 @@ struct pci_bus;
int x86_pci_root_bus_node(int bus);
void x86_pci_root_bus_resources(int bus, struct list_head *resources);
extern bool x86_topology_update;
#ifdef CONFIG_SCHED_MC_PRIO
#include <asm/percpu.h>
DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
/* Interface to set priority of a cpu */
void sched_set_itmt_core_prio(int prio, int core_cpu);
/* Interface to notify scheduler that system supports ITMT */
int sched_set_itmt_support(void);
/* Interface to notify scheduler that system revokes ITMT support */
void sched_clear_itmt_support(void);
#else /* CONFIG_SCHED_MC_PRIO */
#define sysctl_sched_itmt_enabled 0
static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
{
}
static inline int sched_set_itmt_support(void)
{
return 0;
}
static inline void sched_clear_itmt_support(void)
{
}
#endif /* CONFIG_SCHED_MC_PRIO */
#endif /* _ASM_X86_TOPOLOGY_H */

Dosyayı Görüntüle

@@ -68,6 +68,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
})
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task())
#else
# define WARN_ON_IN_IRQ()
#endif
/**
* access_ok: - Checks if a user space pointer is valid
* @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
@@ -88,8 +94,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
#define access_ok(type, addr, size) \
likely(!__range_not_ok(addr, size, user_addr_max()))
#define access_ok(type, addr, size) \
({ \
WARN_ON_IN_IRQ(); \
likely(!__range_not_ok(addr, size, user_addr_max())); \
})
/*
* These are the main single-value transfer routines. They automatically

Dosyayı Görüntüle

@@ -123,6 +123,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
ifdef CONFIG_FRAME_POINTER
obj-y += unwind_frame.o

Dosyayı Görüntüle

@@ -906,14 +906,14 @@ static int apm_cpu_idle(struct cpuidle_device *dev,
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
static unsigned int last_stime; /* = 0 */
cputime_t stime;
cputime_t stime, utime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
unsigned int bucket;
recalc:
task_cputime(current, NULL, &stime);
task_cputime(current, &utime, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
} else if (jiffies_since_last_check > idle_period) {

215
arch/x86/kernel/itmt.c Normal dosya
Dosyayı Görüntüle

@@ -0,0 +1,215 @@
/*
* itmt.c: Support Intel Turbo Boost Max Technology 3.0
*
* (C) Copyright 2016 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*
* On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
* the maximum turbo frequencies of some cores in a CPU package may be
* higher than for the other cores in the same package. In that case,
* better performance can be achieved by making the scheduler prefer
* to run tasks on the CPUs with higher max turbo frequencies.
*
* This file provides functions and data structures for enabling the
* scheduler to favor scheduling on cores can be boosted to a higher
* frequency under ITMT.
*/
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <linux/cpuset.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/sysctl.h>
#include <linux/nodemask.h>
static DEFINE_MUTEX(itmt_update_mutex);
DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
/* Boolean to track if system has ITMT capabilities */
static bool __read_mostly sched_itmt_capable;
/*
* Boolean to control whether we want to move processes to cpu capable
* of higher turbo frequency for cpus supporting Intel Turbo Boost Max
* Technology 3.0.
*
* It can be set via /proc/sys/kernel/sched_itmt_enabled
*/
unsigned int __read_mostly sysctl_sched_itmt_enabled;
static int sched_itmt_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
unsigned int old_sysctl;
int ret;
mutex_lock(&itmt_update_mutex);
if (!sched_itmt_capable) {
mutex_unlock(&itmt_update_mutex);
return -EINVAL;
}
old_sysctl = sysctl_sched_itmt_enabled;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
x86_topology_update = true;
rebuild_sched_domains();
}
mutex_unlock(&itmt_update_mutex);
return ret;
}
static unsigned int zero;
static unsigned int one = 1;
static struct ctl_table itmt_kern_table[] = {
{
.procname = "sched_itmt_enabled",
.data = &sysctl_sched_itmt_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_itmt_update_handler,
.extra1 = &zero,
.extra2 = &one,
},
{}
};
static struct ctl_table itmt_root_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = itmt_kern_table,
},
{}
};
static struct ctl_table_header *itmt_sysctl_header;
/**
* sched_set_itmt_support() - Indicate platform supports ITMT
*
* This function is used by the OS to indicate to scheduler that the platform
* is capable of supporting the ITMT feature.
*
* The current scheme has the pstate driver detects if the system
* is ITMT capable and call sched_set_itmt_support.
*
* This must be done only after sched_set_itmt_core_prio
* has been called to set the cpus' priorities.
* It must not be called with cpu hot plug lock
* held as we need to acquire the lock to rebuild sched domains
* later.
*
* Return: 0 on success
*/
int sched_set_itmt_support(void)
{
mutex_lock(&itmt_update_mutex);
if (sched_itmt_capable) {
mutex_unlock(&itmt_update_mutex);
return 0;
}
itmt_sysctl_header = register_sysctl_table(itmt_root_table);
if (!itmt_sysctl_header) {
mutex_unlock(&itmt_update_mutex);
return -ENOMEM;
}
sched_itmt_capable = true;
sysctl_sched_itmt_enabled = 1;
if (sysctl_sched_itmt_enabled) {
x86_topology_update = true;
rebuild_sched_domains();
}
mutex_unlock(&itmt_update_mutex);
return 0;
}
/**
* sched_clear_itmt_support() - Revoke platform's support of ITMT
*
* This function is used by the OS to indicate that it has
* revoked the platform's support of ITMT feature.
*
* It must not be called with cpu hot plug lock
* held as we need to acquire the lock to rebuild sched domains
* later.
*/
void sched_clear_itmt_support(void)
{
mutex_lock(&itmt_update_mutex);
if (!sched_itmt_capable) {
mutex_unlock(&itmt_update_mutex);
return;
}
sched_itmt_capable = false;
if (itmt_sysctl_header) {
unregister_sysctl_table(itmt_sysctl_header);
itmt_sysctl_header = NULL;
}
if (sysctl_sched_itmt_enabled) {
/* disable sched_itmt if we are no longer ITMT capable */
sysctl_sched_itmt_enabled = 0;
x86_topology_update = true;
rebuild_sched_domains();
}
mutex_unlock(&itmt_update_mutex);
}
int arch_asym_cpu_priority(int cpu)
{
return per_cpu(sched_core_priority, cpu);
}
/**
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
* @prio: Priority of cpu core
* @core_cpu: The cpu number associated with the core
*
* The pstate driver will find out the max boost frequency
* and call this function to set a priority proportional
* to the max boost frequency. CPU with higher boost
* frequency will receive higher priority.
*
* No need to rebuild sched domain after updating
* the CPU priorities. The sched domains have no
* dependency on CPU priorities.
*/
void sched_set_itmt_core_prio(int prio, int core_cpu)
{
int cpu, i = 1;
for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
int smt_prio;
/*
* Ensure that the siblings are moved to the end
* of the priority chain and only used when
* all other high priority cpus are out of capacity.
*/
smt_prio = prio * smp_num_siblings / i;
per_cpu(sched_core_priority, cpu) = smt_prio;
i++;
}
}

Dosyayı Görüntüle

@@ -109,6 +109,17 @@ static bool logical_packages_frozen __read_mostly;
/* Maximum number of SMT threads on any online core */
int __max_smt_threads __read_mostly;
/* Flag to indicate if a complete sched domain rebuild is required */
bool x86_topology_update;
int arch_update_cpu_topology(void)
{
int retval = x86_topology_update;
x86_topology_update = false;
return retval;
}
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -471,22 +482,42 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
static inline int x86_sched_itmt_flags(void)
{
return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
}
#ifdef CONFIG_SCHED_MC
static int x86_core_flags(void)
{
return cpu_core_flags() | x86_sched_itmt_flags();
}
#endif
#ifdef CONFIG_SCHED_SMT
static int x86_smt_flags(void)
{
return cpu_smt_flags() | x86_sched_itmt_flags();
}
#endif
#endif
static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
{ NULL, },
};
static struct sched_domain_topology_level x86_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },