123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460 |
- // SPDX-License-Identifier: GPL-2.0-only
- /*
- * x86 APERF/MPERF KHz calculation for
- * /sys/.../cpufreq/scaling_cur_freq
- *
- * Copyright (C) 2017 Intel Corp.
- * Author: Len Brown <[email protected]>
- */
- #include <linux/cpufreq.h>
- #include <linux/delay.h>
- #include <linux/ktime.h>
- #include <linux/math64.h>
- #include <linux/percpu.h>
- #include <linux/rcupdate.h>
- #include <linux/sched/isolation.h>
- #include <linux/sched/topology.h>
- #include <linux/smp.h>
- #include <linux/syscore_ops.h>
- #include <asm/cpu.h>
- #include <asm/cpu_device_id.h>
- #include <asm/intel-family.h>
- #include "cpu.h"
- struct aperfmperf {
- seqcount_t seq;
- unsigned long last_update;
- u64 acnt;
- u64 mcnt;
- u64 aperf;
- u64 mperf;
- };
- static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = {
- .seq = SEQCNT_ZERO(cpu_samples.seq)
- };
- static void init_counter_refs(void)
- {
- u64 aperf, mperf;
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
- this_cpu_write(cpu_samples.aperf, aperf);
- this_cpu_write(cpu_samples.mperf, mperf);
- }
- #if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
- /*
- * APERF/MPERF frequency ratio computation.
- *
- * The scheduler wants to do frequency invariant accounting and needs a <1
- * ratio to account for the 'current' frequency, corresponding to
- * freq_curr / freq_max.
- *
- * Since the frequency freq_curr on x86 is controlled by micro-controller and
- * our P-state setting is little more than a request/hint, we need to observe
- * the effective frequency 'BusyMHz', i.e. the average frequency over a time
- * interval after discarding idle time. This is given by:
- *
- * BusyMHz = delta_APERF / delta_MPERF * freq_base
- *
- * where freq_base is the max non-turbo P-state.
- *
- * The freq_max term has to be set to a somewhat arbitrary value, because we
- * can't know which turbo states will be available at a given point in time:
- * it all depends on the thermal headroom of the entire package. We set it to
- * the turbo level with 4 cores active.
- *
- * Benchmarks show that's a good compromise between the 1C turbo ratio
- * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
- * which would ignore the entire turbo range (a conspicuous part, making
- * freq_curr/freq_max always maxed out).
- *
- * An exception to the heuristic above is the Atom uarch, where we choose the
- * highest turbo level for freq_max since Atom's are generally oriented towards
- * power efficiency.
- *
- * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
- * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
- */
- DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
- static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
- static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
- void arch_set_max_freq_ratio(bool turbo_disabled)
- {
- arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
- arch_turbo_freq_ratio;
- }
- EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
- static bool __init turbo_disabled(void)
- {
- u64 misc_en;
- int err;
- err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
- if (err)
- return false;
- return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
- }
- static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
- {
- int err;
- err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
- if (err)
- return false;
- err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
- if (err)
- return false;
- *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
- *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
- return true;
- }
- #define X86_MATCH(model) \
- X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
- INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
- static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = {
- X86_MATCH(XEON_PHI_KNL),
- X86_MATCH(XEON_PHI_KNM),
- {}
- };
- static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = {
- X86_MATCH(SKYLAKE_X),
- {}
- };
- static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = {
- X86_MATCH(ATOM_GOLDMONT),
- X86_MATCH(ATOM_GOLDMONT_D),
- X86_MATCH(ATOM_GOLDMONT_PLUS),
- {}
- };
- static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
- int num_delta_fratio)
- {
- int fratio, delta_fratio, found;
- int err, i;
- u64 msr;
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
- if (err)
- return false;
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
- if (err)
- return false;
- fratio = (msr >> 8) & 0xFF;
- i = 16;
- found = 0;
- do {
- if (found >= num_delta_fratio) {
- *turbo_freq = fratio;
- return true;
- }
- delta_fratio = (msr >> (i + 5)) & 0x7;
- if (delta_fratio) {
- found += 1;
- fratio -= delta_fratio;
- }
- i += 8;
- } while (i < 64);
- return true;
- }
- static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
- {
- u64 ratios, counts;
- u32 group_size;
- int err, i;
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
- if (err)
- return false;
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
- if (err)
- return false;
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
- if (err)
- return false;
- for (i = 0; i < 64; i += 8) {
- group_size = (counts >> i) & 0xFF;
- if (group_size >= size) {
- *turbo_freq = (ratios >> i) & 0xFF;
- return true;
- }
- }
- return false;
- }
- static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
- {
- u64 msr;
- int err;
- err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
- if (err)
- return false;
- err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
- if (err)
- return false;
- *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
- *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
- /* The CPU may have less than 4 cores */
- if (!*turbo_freq)
- *turbo_freq = msr & 0xFF; /* 1C turbo */
- return true;
- }
- static bool __init intel_set_max_freq_ratio(void)
- {
- u64 base_freq, turbo_freq;
- u64 turbo_ratio;
- if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
- goto out;
- if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
- skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
- goto out;
- if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
- knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
- goto out;
- if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
- skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
- goto out;
- if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
- goto out;
- return false;
- out:
- /*
- * Some hypervisors advertise X86_FEATURE_APERFMPERF
- * but then fill all MSR's with zeroes.
- * Some CPUs have turbo boost but don't declare any turbo ratio
- * in MSR_TURBO_RATIO_LIMIT.
- */
- if (!base_freq || !turbo_freq) {
- pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
- return false;
- }
- turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
- if (!turbo_ratio) {
- pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
- return false;
- }
- arch_turbo_freq_ratio = turbo_ratio;
- arch_set_max_freq_ratio(turbo_disabled());
- return true;
- }
- #ifdef CONFIG_PM_SLEEP
- static struct syscore_ops freq_invariance_syscore_ops = {
- .resume = init_counter_refs,
- };
- static void register_freq_invariance_syscore_ops(void)
- {
- register_syscore_ops(&freq_invariance_syscore_ops);
- }
- #else
- static inline void register_freq_invariance_syscore_ops(void) {}
- #endif
- static void freq_invariance_enable(void)
- {
- if (static_branch_unlikely(&arch_scale_freq_key)) {
- WARN_ON_ONCE(1);
- return;
- }
- static_branch_enable(&arch_scale_freq_key);
- register_freq_invariance_syscore_ops();
- pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
- }
- void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled)
- {
- arch_turbo_freq_ratio = ratio;
- arch_set_max_freq_ratio(turbo_disabled);
- freq_invariance_enable();
- }
- static void __init bp_init_freq_invariance(void)
- {
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
- return;
- if (intel_set_max_freq_ratio())
- freq_invariance_enable();
- }
- static void disable_freq_invariance_workfn(struct work_struct *work)
- {
- int cpu;
- static_branch_disable(&arch_scale_freq_key);
- /*
- * Set arch_freq_scale to a default value on all cpus
- * This negates the effect of scaling
- */
- for_each_possible_cpu(cpu)
- per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE;
- }
- static DECLARE_WORK(disable_freq_invariance_work,
- disable_freq_invariance_workfn);
- DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
- static void scale_freq_tick(u64 acnt, u64 mcnt)
- {
- u64 freq_scale;
- if (!arch_scale_freq_invariant())
- return;
- if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
- goto error;
- if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
- goto error;
- freq_scale = div64_u64(acnt, mcnt);
- if (!freq_scale)
- goto error;
- if (freq_scale > SCHED_CAPACITY_SCALE)
- freq_scale = SCHED_CAPACITY_SCALE;
- this_cpu_write(arch_freq_scale, freq_scale);
- return;
- error:
- pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
- schedule_work(&disable_freq_invariance_work);
- }
- #else
- static inline void bp_init_freq_invariance(void) { }
- static inline void scale_freq_tick(u64 acnt, u64 mcnt) { }
- #endif /* CONFIG_X86_64 && CONFIG_SMP */
- void arch_scale_freq_tick(void)
- {
- struct aperfmperf *s = this_cpu_ptr(&cpu_samples);
- u64 acnt, mcnt, aperf, mperf;
- if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
- return;
- rdmsrl(MSR_IA32_APERF, aperf);
- rdmsrl(MSR_IA32_MPERF, mperf);
- acnt = aperf - s->aperf;
- mcnt = mperf - s->mperf;
- s->aperf = aperf;
- s->mperf = mperf;
- raw_write_seqcount_begin(&s->seq);
- s->last_update = jiffies;
- s->acnt = acnt;
- s->mcnt = mcnt;
- raw_write_seqcount_end(&s->seq);
- scale_freq_tick(acnt, mcnt);
- }
- /*
- * Discard samples older than the define maximum sample age of 20ms. There
- * is no point in sending IPIs in such a case. If the scheduler tick was
- * not running then the CPU is either idle or isolated.
- */
- #define MAX_SAMPLE_AGE ((unsigned long)HZ / 50)
- unsigned int arch_freq_get_on_cpu(int cpu)
- {
- struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
- unsigned int seq, freq;
- unsigned long last;
- u64 acnt, mcnt;
- if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
- goto fallback;
- do {
- seq = raw_read_seqcount_begin(&s->seq);
- last = s->last_update;
- acnt = s->acnt;
- mcnt = s->mcnt;
- } while (read_seqcount_retry(&s->seq, seq));
- /*
- * Bail on invalid count and when the last update was too long ago,
- * which covers idle and NOHZ full CPUs.
- */
- if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE)
- goto fallback;
- return div64_u64((cpu_khz * acnt), mcnt);
- fallback:
- freq = cpufreq_quick_get(cpu);
- return freq ? freq : cpu_khz;
- }
- static int __init bp_init_aperfmperf(void)
- {
- if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
- return 0;
- init_counter_refs();
- bp_init_freq_invariance();
- return 0;
- }
- early_initcall(bp_init_aperfmperf);
- void ap_init_aperfmperf(void)
- {
- if (cpu_feature_enabled(X86_FEATURE_APERFMPERF))
- init_counter_refs();
- }
|