Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main scheduler changes in this cycle were: - support Intel Turbo Boost Max Technology 3.0 (TBM3) by introducig a notion of 'better cores', which the scheduler will prefer to schedule single threaded workloads on. (Tim Chen, Srinivas Pandruvada) - enhance the handling of asymmetric capacity CPUs further (Morten Rasmussen) - improve/fix load handling when moving tasks between task groups (Vincent Guittot) - simplify and clean up the cputime code (Stanislaw Gruszka) - improve mass fork()ed task spread a.k.a. hackbench speedup (Vincent Guittot) - make struct kthread kmalloc()ed and related fixes (Oleg Nesterov) - add uaccess atomicity debugging (when using access_ok() in the wrong context), under CONFIG_DEBUG_ATOMIC_SLEEP=y (Peter Zijlstra) - implement various fixes, cleanups and other enhancements (Daniel Bristot de Oliveira, Martin Schwidefsky, Rafael J. Wysocki)" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits) sched/core: Use load_avg for selecting idlest group sched/core: Fix find_idlest_group() for fork kthread: Don't abuse kthread_create_on_cpu() in __kthread_create_worker() kthread: Don't use to_live_kthread() in kthread_[un]park() kthread: Don't use to_live_kthread() in kthread_stop() Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function" kthread: Make struct kthread kmalloc'ed x86/uaccess, sched/preempt: Verify access_ok() context sched/x86: Make CONFIG_SCHED_MC_PRIO=y easier to enable sched/x86: Change CONFIG_SCHED_ITMT to CONFIG_SCHED_MC_PRIO x86/sched: Use #include <linux/mutex.h> instead of #include <asm/mutex.h> cpufreq/intel_pstate: Use CPPC to get max performance acpi/bus: Set _OSC for diverse core support acpi/bus: Enable HWP CPPC objects x86/sched: Add SD_ASYM_PACKING flags to x86 ITMT CPU x86/sysctl: Add sysctl for ITMT scheduling feature x86: Enable Intel Turbo Boost Max Technology 3.0 x86/topology: Define x86's arch_update_cpu_topology sched: Extend scheduler's asym packing sched/fair: Clean up the tunable parameter definitions ...
Bu işleme şunda yer alıyor:
@@ -939,6 +939,27 @@ config SCHED_MC
|
||||
making when dealing with multi-core CPU chips at a cost of slightly
|
||||
increased overhead in some places. If unsure say N here.
|
||||
|
||||
config SCHED_MC_PRIO
|
||||
bool "CPU core priorities scheduler support"
|
||||
depends on SCHED_MC && CPU_SUP_INTEL
|
||||
select X86_INTEL_PSTATE
|
||||
select CPU_FREQ
|
||||
default y
|
||||
---help---
|
||||
Intel Turbo Boost Max Technology 3.0 enabled CPUs have a
|
||||
core ordering determined at manufacturing time, which allows
|
||||
certain cores to reach higher turbo frequencies (when running
|
||||
single threaded workloads) than others.
|
||||
|
||||
Enabling this kernel feature teaches the scheduler about
|
||||
the TBM3 (aka ITMT) priority order of the CPU cores and adjusts the
|
||||
scheduler's CPU selection logic accordingly, so that higher
|
||||
overall system performance can be achieved.
|
||||
|
||||
This feature will have no effect on CPUs without this feature.
|
||||
|
||||
If unsure say Y here.
|
||||
|
||||
source "kernel/Kconfig.preempt"
|
||||
|
||||
config UP_LATE_INIT
|
||||
|
@@ -24,7 +24,13 @@ static __always_inline int preempt_count(void)
|
||||
|
||||
static __always_inline void preempt_count_set(int pc)
|
||||
{
|
||||
raw_cpu_write_4(__preempt_count, pc);
|
||||
int old, new;
|
||||
|
||||
do {
|
||||
old = raw_cpu_read_4(__preempt_count);
|
||||
new = (old & PREEMPT_NEED_RESCHED) |
|
||||
(pc & ~PREEMPT_NEED_RESCHED);
|
||||
} while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -146,4 +146,36 @@ struct pci_bus;
|
||||
int x86_pci_root_bus_node(int bus);
|
||||
void x86_pci_root_bus_resources(int bus, struct list_head *resources);
|
||||
|
||||
extern bool x86_topology_update;
|
||||
|
||||
#ifdef CONFIG_SCHED_MC_PRIO
|
||||
#include <asm/percpu.h>
|
||||
|
||||
DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
|
||||
extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
|
||||
|
||||
/* Interface to set priority of a cpu */
|
||||
void sched_set_itmt_core_prio(int prio, int core_cpu);
|
||||
|
||||
/* Interface to notify scheduler that system supports ITMT */
|
||||
int sched_set_itmt_support(void);
|
||||
|
||||
/* Interface to notify scheduler that system revokes ITMT support */
|
||||
void sched_clear_itmt_support(void);
|
||||
|
||||
#else /* CONFIG_SCHED_MC_PRIO */
|
||||
|
||||
#define sysctl_sched_itmt_enabled 0
|
||||
static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
|
||||
{
|
||||
}
|
||||
static inline int sched_set_itmt_support(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void sched_clear_itmt_support(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SCHED_MC_PRIO */
|
||||
|
||||
#endif /* _ASM_X86_TOPOLOGY_H */
|
||||
|
@@ -68,6 +68,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
|
||||
__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
|
||||
})
|
||||
|
||||
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
|
||||
# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task())
|
||||
#else
|
||||
# define WARN_ON_IN_IRQ()
|
||||
#endif
|
||||
|
||||
/**
|
||||
* access_ok: - Checks if a user space pointer is valid
|
||||
* @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
|
||||
@@ -88,8 +94,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
|
||||
* checks that the pointer is in the user space range - after calling
|
||||
* this function, memory access functions may still return -EFAULT.
|
||||
*/
|
||||
#define access_ok(type, addr, size) \
|
||||
likely(!__range_not_ok(addr, size, user_addr_max()))
|
||||
#define access_ok(type, addr, size) \
|
||||
({ \
|
||||
WARN_ON_IN_IRQ(); \
|
||||
likely(!__range_not_ok(addr, size, user_addr_max())); \
|
||||
})
|
||||
|
||||
/*
|
||||
* These are the main single-value transfer routines. They automatically
|
||||
|
@@ -123,6 +123,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
|
||||
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||
obj-$(CONFIG_TRACING) += tracepoint.o
|
||||
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
|
||||
|
||||
ifdef CONFIG_FRAME_POINTER
|
||||
obj-y += unwind_frame.o
|
||||
|
@@ -906,14 +906,14 @@ static int apm_cpu_idle(struct cpuidle_device *dev,
|
||||
static int use_apm_idle; /* = 0 */
|
||||
static unsigned int last_jiffies; /* = 0 */
|
||||
static unsigned int last_stime; /* = 0 */
|
||||
cputime_t stime;
|
||||
cputime_t stime, utime;
|
||||
|
||||
int apm_idle_done = 0;
|
||||
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
|
||||
unsigned int bucket;
|
||||
|
||||
recalc:
|
||||
task_cputime(current, NULL, &stime);
|
||||
task_cputime(current, &utime, &stime);
|
||||
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
|
||||
use_apm_idle = 0;
|
||||
} else if (jiffies_since_last_check > idle_period) {
|
||||
|
215
arch/x86/kernel/itmt.c
Normal dosya
215
arch/x86/kernel/itmt.c
Normal dosya
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
* itmt.c: Support Intel Turbo Boost Max Technology 3.0
|
||||
*
|
||||
* (C) Copyright 2016 Intel Corporation
|
||||
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*
|
||||
* On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
|
||||
* the maximum turbo frequencies of some cores in a CPU package may be
|
||||
* higher than for the other cores in the same package. In that case,
|
||||
* better performance can be achieved by making the scheduler prefer
|
||||
* to run tasks on the CPUs with higher max turbo frequencies.
|
||||
*
|
||||
* This file provides functions and data structures for enabling the
|
||||
* scheduler to favor scheduling on cores can be boosted to a higher
|
||||
* frequency under ITMT.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/nodemask.h>
|
||||
|
||||
static DEFINE_MUTEX(itmt_update_mutex);
|
||||
DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
|
||||
|
||||
/* Boolean to track if system has ITMT capabilities */
|
||||
static bool __read_mostly sched_itmt_capable;
|
||||
|
||||
/*
|
||||
* Boolean to control whether we want to move processes to cpu capable
|
||||
* of higher turbo frequency for cpus supporting Intel Turbo Boost Max
|
||||
* Technology 3.0.
|
||||
*
|
||||
* It can be set via /proc/sys/kernel/sched_itmt_enabled
|
||||
*/
|
||||
unsigned int __read_mostly sysctl_sched_itmt_enabled;
|
||||
|
||||
static int sched_itmt_update_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
unsigned int old_sysctl;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&itmt_update_mutex);
|
||||
|
||||
if (!sched_itmt_capable) {
|
||||
mutex_unlock(&itmt_update_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
old_sysctl = sysctl_sched_itmt_enabled;
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
|
||||
if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
|
||||
x86_topology_update = true;
|
||||
rebuild_sched_domains();
|
||||
}
|
||||
|
||||
mutex_unlock(&itmt_update_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned int zero;
|
||||
static unsigned int one = 1;
|
||||
static struct ctl_table itmt_kern_table[] = {
|
||||
{
|
||||
.procname = "sched_itmt_enabled",
|
||||
.data = &sysctl_sched_itmt_enabled,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_itmt_update_handler,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table itmt_root_table[] = {
|
||||
{
|
||||
.procname = "kernel",
|
||||
.mode = 0555,
|
||||
.child = itmt_kern_table,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table_header *itmt_sysctl_header;
|
||||
|
||||
/**
|
||||
* sched_set_itmt_support() - Indicate platform supports ITMT
|
||||
*
|
||||
* This function is used by the OS to indicate to scheduler that the platform
|
||||
* is capable of supporting the ITMT feature.
|
||||
*
|
||||
* The current scheme has the pstate driver detects if the system
|
||||
* is ITMT capable and call sched_set_itmt_support.
|
||||
*
|
||||
* This must be done only after sched_set_itmt_core_prio
|
||||
* has been called to set the cpus' priorities.
|
||||
* It must not be called with cpu hot plug lock
|
||||
* held as we need to acquire the lock to rebuild sched domains
|
||||
* later.
|
||||
*
|
||||
* Return: 0 on success
|
||||
*/
|
||||
int sched_set_itmt_support(void)
|
||||
{
|
||||
mutex_lock(&itmt_update_mutex);
|
||||
|
||||
if (sched_itmt_capable) {
|
||||
mutex_unlock(&itmt_update_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
itmt_sysctl_header = register_sysctl_table(itmt_root_table);
|
||||
if (!itmt_sysctl_header) {
|
||||
mutex_unlock(&itmt_update_mutex);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
sched_itmt_capable = true;
|
||||
|
||||
sysctl_sched_itmt_enabled = 1;
|
||||
|
||||
if (sysctl_sched_itmt_enabled) {
|
||||
x86_topology_update = true;
|
||||
rebuild_sched_domains();
|
||||
}
|
||||
|
||||
mutex_unlock(&itmt_update_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* sched_clear_itmt_support() - Revoke platform's support of ITMT
|
||||
*
|
||||
* This function is used by the OS to indicate that it has
|
||||
* revoked the platform's support of ITMT feature.
|
||||
*
|
||||
* It must not be called with cpu hot plug lock
|
||||
* held as we need to acquire the lock to rebuild sched domains
|
||||
* later.
|
||||
*/
|
||||
void sched_clear_itmt_support(void)
|
||||
{
|
||||
mutex_lock(&itmt_update_mutex);
|
||||
|
||||
if (!sched_itmt_capable) {
|
||||
mutex_unlock(&itmt_update_mutex);
|
||||
return;
|
||||
}
|
||||
sched_itmt_capable = false;
|
||||
|
||||
if (itmt_sysctl_header) {
|
||||
unregister_sysctl_table(itmt_sysctl_header);
|
||||
itmt_sysctl_header = NULL;
|
||||
}
|
||||
|
||||
if (sysctl_sched_itmt_enabled) {
|
||||
/* disable sched_itmt if we are no longer ITMT capable */
|
||||
sysctl_sched_itmt_enabled = 0;
|
||||
x86_topology_update = true;
|
||||
rebuild_sched_domains();
|
||||
}
|
||||
|
||||
mutex_unlock(&itmt_update_mutex);
|
||||
}
|
||||
|
||||
int arch_asym_cpu_priority(int cpu)
|
||||
{
|
||||
return per_cpu(sched_core_priority, cpu);
|
||||
}
|
||||
|
||||
/**
|
||||
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
|
||||
* @prio: Priority of cpu core
|
||||
* @core_cpu: The cpu number associated with the core
|
||||
*
|
||||
* The pstate driver will find out the max boost frequency
|
||||
* and call this function to set a priority proportional
|
||||
* to the max boost frequency. CPU with higher boost
|
||||
* frequency will receive higher priority.
|
||||
*
|
||||
* No need to rebuild sched domain after updating
|
||||
* the CPU priorities. The sched domains have no
|
||||
* dependency on CPU priorities.
|
||||
*/
|
||||
void sched_set_itmt_core_prio(int prio, int core_cpu)
|
||||
{
|
||||
int cpu, i = 1;
|
||||
|
||||
for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
|
||||
int smt_prio;
|
||||
|
||||
/*
|
||||
* Ensure that the siblings are moved to the end
|
||||
* of the priority chain and only used when
|
||||
* all other high priority cpus are out of capacity.
|
||||
*/
|
||||
smt_prio = prio * smp_num_siblings / i;
|
||||
per_cpu(sched_core_priority, cpu) = smt_prio;
|
||||
i++;
|
||||
}
|
||||
}
|
@@ -109,6 +109,17 @@ static bool logical_packages_frozen __read_mostly;
|
||||
/* Maximum number of SMT threads on any online core */
|
||||
int __max_smt_threads __read_mostly;
|
||||
|
||||
/* Flag to indicate if a complete sched domain rebuild is required */
|
||||
bool x86_topology_update;
|
||||
|
||||
int arch_update_cpu_topology(void)
|
||||
{
|
||||
int retval = x86_topology_update;
|
||||
|
||||
x86_topology_update = false;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -471,22 +482,42 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
|
||||
static inline int x86_sched_itmt_flags(void)
|
||||
{
|
||||
return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
static int x86_core_flags(void)
|
||||
{
|
||||
return cpu_core_flags() | x86_sched_itmt_flags();
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
static int x86_smt_flags(void)
|
||||
{
|
||||
return cpu_smt_flags() | x86_sched_itmt_flags();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
|
||||
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
|
||||
{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
|
||||
#endif
|
||||
{ NULL, },
|
||||
};
|
||||
|
||||
static struct sched_domain_topology_level x86_topology[] = {
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
|
||||
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
|
||||
{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
|
||||
#endif
|
||||
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
|
||||
{ NULL, },
|
||||
|
Yeni konuda referans
Bir kullanıcı engelle