Merge tag 'pm-5.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management updates from Rafael Wysocki:
 "These fix the (Intel-specific) Performance and Energy Bias Hint (EPB)
  handling and expose it to user space via sysfs, fix and clean up
  several cpufreq drivers, add support for two new chips to the qoriq
  cpufreq driver, fix, simplify and clean up the cpufreq core and the
  schedutil governor, add support for "CPU" domains to the generic power
  domains (genpd) framework and provide low-level PSCI firmware support
  for that feature, fix the exynos cpuidle driver and fix a couple of
  issues in the devfreq subsystem and clean it up.

  Specifics:

   - Fix the handling of Performance and Energy Bias Hint (EPB) on Intel
     processors and expose it to user space via sysfs to avoid having to
     access it through the generic MSR I/F (Rafael Wysocki).

   - Improve the handling of global turbo changes made by the platform
     firmware in the intel_pstate driver (Rafael Wysocki).

   - Convert some slow-path static_cpu_has() callers to boot_cpu_has()
     in cpufreq (Borislav Petkov).

   - Fix the frequency calculation loop in the armada-37xx cpufreq
     driver (Gregory CLEMENT).

   - Fix possible object reference leaks in multuple cpufreq drivers
     (Wen Yang).

   - Fix kerneldoc comment in the centrino cpufreq driver (dongjian).

   - Clean up the ACPI and maple cpufreq drivers (Viresh Kumar, Mohan
     Kumar).

   - Add support for lx2160a and ls1028a to the qoriq cpufreq driver
     (Vabhav Sharma, Yuantian Tang).

   - Fix kobject memory leak in the cpufreq core (Viresh Kumar).

   - Simplify the IOwait boosting in the schedutil cpufreq governor and
     rework the TSC cpufreq notifier on x86 (Rafael Wysocki).

   - Clean up the cpufreq core and statistics code (Yue Hu, Kyle Lin).

   - Improve the cpufreq documentation, add SPDX license tags to some PM
     documentation files and unify copyright notices in them (Rafael
     Wysocki).

   - Add support for "CPU" domains to the generic power domains (genpd)
     framework and provide low-level PSCI firmware support for that
     feature (Ulf Hansson).

   - Rearrange the PSCI firmware support code and add support for
     SYSTEM_RESET2 to it (Ulf Hansson, Sudeep Holla).

   - Improve genpd support for devices in multiple power domains (Ulf
     Hansson).

   - Unify target residency for the AFTR and coupled AFTR states in the
     exynos cpuidle driver (Marek Szyprowski).

   - Introduce new helper routine in the operating performance points
     (OPP) framework (Andrew-sh.Cheng).

   - Add support for passing on-die termination (ODT) and auto power
     down parameters from the kernel to Trusted Firmware-A (TF-A) to the
     rk3399_dmc devfreq driver (Enric Balletbo i Serra).

   - Add tracing to devfreq (Lukasz Luba).

   - Make the exynos-bus devfreq driver suspend all devices on system
     shutdown (Marek Szyprowski).

   - Fix a few minor issues in the devfreq subsystem and clean it up
     somewhat (Enric Balletbo i Serra, MyungJoo Ham, Rob Herring,
     Saravana Kannan, Yangtao Li).

   - Improve system wakeup diagnostics (Stephen Boyd).

   - Rework filesystem sync messages emitted during system suspend and
     hibernation (Harry Pan)"

* tag 'pm-5.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (72 commits)
  cpufreq: Fix kobject memleak
  cpufreq: armada-37xx: fix frequency calculation for opp
  cpufreq: centrino: Fix centrino_setpolicy() kerneldoc comment
  cpufreq: qoriq: add support for lx2160a
  x86: tsc: Rework time_cpufreq_notifier()
  PM / Domains: Allow to attach a CPU via genpd_dev_pm_attach_by_id|name()
  PM / Domains: Search for the CPU device outside the genpd lock
  PM / Domains: Drop unused in-parameter to some genpd functions
  PM / Domains: Use the base device for driver_deferred_probe_check_state()
  cpufreq: qoriq: Add ls1028a chip support
  PM / Domains: Enable genpd_dev_pm_attach_by_id|name() for single PM domain
  PM / Domains: Allow OF lookup for multi PM domain case from ->attach_dev()
  PM / Domains: Don't kfree() the virtual device in the error path
  cpufreq: Move ->get callback check outside of __cpufreq_get()
  PM / Domains: remove unnecessary unlikely()
  cpufreq: Remove needless bios_limit check in show_bios_limit()
  drivers/cpufreq/acpi-cpufreq.c: This fixes the following checkpatch warning
  firmware/psci: add support for SYSTEM_RESET2
  PM / devfreq: add tracing for scheduling work
  trace: events: add devfreq trace event file
  ...
This commit is contained in:
Linus Torvalds
2019-05-06 19:40:31 -07:00
79 changed files with 1241 additions and 391 deletions

View File

@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o

View File

@@ -1824,23 +1824,6 @@ void cpu_init(void)
}
#endif
static void bsp_resume(void)
{
if (this_cpu->c_bsp_resume)
this_cpu->c_bsp_resume(&boot_cpu_data);
}
static struct syscore_ops cpu_syscore_ops = {
.resume = bsp_resume,
};
static int __init init_cpu_syscore(void)
{
register_syscore_ops(&cpu_syscore_ops);
return 0;
}
core_initcall(init_cpu_syscore);
/*
* The microcode loader calls this upon late microcode load to recheck features,
* only when microcode has been updated. Caller holds microcode_mutex and CPU

View File

@@ -14,7 +14,6 @@ struct cpu_dev {
void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *);
void (*c_detect_tlb)(struct cpuinfo_x86 *);
void (*c_bsp_resume)(struct cpuinfo_x86 *);
int c_x86_vendor;
#ifdef CONFIG_X86_32
/* Optional vendor specific routine to obtain the cache size. */

View File

@@ -596,36 +596,6 @@ detect_keyid_bits:
c->x86_phys_bits -= keyid_bits;
}
static void init_intel_energy_perf(struct cpuinfo_x86 *c)
{
u64 epb;
/*
* Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized.
* (x86_energy_perf_policy(8) is available to change it at run-time.)
*/
if (!cpu_has(c, X86_FEATURE_EPB))
return;
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
return;
pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
}
static void intel_bsp_resume(struct cpuinfo_x86 *c)
{
/*
* MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume,
* so reinitialize it properly like during bootup:
*/
init_intel_energy_perf(c);
}
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -763,8 +733,6 @@ static void init_intel(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_TME))
detect_tme(c);
init_intel_energy_perf(c);
init_intel_misc_features(c);
}
@@ -1023,9 +991,7 @@ static const struct cpu_dev intel_cpu_dev = {
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
.c_init = init_intel,
.c_bsp_resume = intel_bsp_resume,
.c_x86_vendor = X86_VENDOR_INTEL,
};
cpu_dev_register(intel_cpu_dev);

View File

@@ -0,0 +1,216 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Intel Performance and Energy Bias Hint support.
*
* Copyright (C) 2019 Intel Corporation
*
* Author:
* Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*/
#include <linux/cpuhotplug.h>
#include <linux/cpu.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/syscore_ops.h>
#include <linux/pm.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
/**
* DOC: overview
*
* The Performance and Energy Bias Hint (EPB) allows software to specify its
* preference with respect to the power-performance tradeoffs present in the
* processor. Generally, the EPB is expected to be set by user space (directly
* via sysfs or with the help of the x86_energy_perf_policy tool), but there are
* two reasons for the kernel to update it.
*
* First, there are systems where the platform firmware resets the EPB during
* system-wide transitions from sleep states back into the working state
* effectively causing the previous EPB updates by user space to be lost.
* Thus the kernel needs to save the current EPB values for all CPUs during
* system-wide transitions to sleep states and restore them on the way back to
* the working state. That can be achieved by saving EPB for secondary CPUs
* when they are taken offline during transitions into system sleep states and
* for the boot CPU in a syscore suspend operation, so that it can be restored
* for the boot CPU in a syscore resume operation and for the other CPUs when
* they are brought back online. However, CPUs that are already offline when
* a system-wide PM transition is started are not taken offline again, but their
* EPB values may still be reset by the platform firmware during the transition,
* so in fact it is necessary to save the EPB of any CPU taken offline and to
* restore it when the given CPU goes back online at all times.
*
* Second, on many systems the initial EPB value coming from the platform
* firmware is 0 ('performance') and at least on some of them that is because
* the platform firmware does not initialize EPB at all with the assumption that
* the OS will do that anyway. That sometimes is problematic, as it may cause
* the system battery to drain too fast, for example, so it is better to adjust
* it on CPU bring-up and if the initial EPB value for a given CPU is 0, the
* kernel changes it to 6 ('normal').
*/
static DEFINE_PER_CPU(u8, saved_epb);
#define EPB_MASK 0x0fULL
#define EPB_SAVED 0x10ULL
#define MAX_EPB EPB_MASK
static int intel_epb_save(void)
{
u64 epb;
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
/*
* Ensure that saved_epb will always be nonzero after this write even if
* the EPB value read from the MSR is 0.
*/
this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED);
return 0;
}
static void intel_epb_restore(void)
{
u64 val = this_cpu_read(saved_epb);
u64 epb;
rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
if (val) {
val &= EPB_MASK;
} else {
/*
* Because intel_epb_save() has not run for the current CPU yet,
* it is going online for the first time, so if its EPB value is
* 0 ('performance') at this point, assume that it has not been
* initialized by the platform firmware and set it to 6
* ('normal').
*/
val = epb & EPB_MASK;
if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
val = ENERGY_PERF_BIAS_NORMAL;
pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
}
}
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
}
static struct syscore_ops intel_epb_syscore_ops = {
.suspend = intel_epb_save,
.resume = intel_epb_restore,
};
static const char * const energy_perf_strings[] = {
"performance",
"balance-performance",
"normal",
"balance-power",
"power"
};
static const u8 energ_perf_values[] = {
ENERGY_PERF_BIAS_PERFORMANCE,
ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
ENERGY_PERF_BIAS_NORMAL,
ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
ENERGY_PERF_BIAS_POWERSAVE
};
static ssize_t energy_perf_bias_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
unsigned int cpu = dev->id;
u64 epb;
int ret;
ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
if (ret < 0)
return ret;
return sprintf(buf, "%llu\n", epb);
}
static ssize_t energy_perf_bias_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
unsigned int cpu = dev->id;
u64 epb, val;
int ret;
ret = __sysfs_match_string(energy_perf_strings,
ARRAY_SIZE(energy_perf_strings), buf);
if (ret >= 0)
val = energ_perf_values[ret];
else if (kstrtou64(buf, 0, &val) || val > MAX_EPB)
return -EINVAL;
ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
if (ret < 0)
return ret;
ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
(epb & ~EPB_MASK) | val);
if (ret < 0)
return ret;
return count;
}
static DEVICE_ATTR_RW(energy_perf_bias);
static struct attribute *intel_epb_attrs[] = {
&dev_attr_energy_perf_bias.attr,
NULL
};
static const struct attribute_group intel_epb_attr_group = {
.name = power_group_name,
.attrs = intel_epb_attrs
};
static int intel_epb_online(unsigned int cpu)
{
struct device *cpu_dev = get_cpu_device(cpu);
intel_epb_restore();
if (!cpuhp_tasks_frozen)
sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group);
return 0;
}
static int intel_epb_offline(unsigned int cpu)
{
struct device *cpu_dev = get_cpu_device(cpu);
if (!cpuhp_tasks_frozen)
sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group);
intel_epb_save();
return 0;
}
static __init int intel_epb_init(void)
{
int ret;
if (!boot_cpu_has(X86_FEATURE_EPB))
return -ENODEV;
ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE,
"x86/intel/epb:online", intel_epb_online,
intel_epb_offline);
if (ret < 0)
goto err_out_online;
register_syscore_ops(&intel_epb_syscore_ops);
return 0;
err_out_online:
cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE);
return ret;
}
subsys_initcall(intel_epb_init);