Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Main kernel side changes: - Big reorganization of the x86 perf support code. The old code grew organically deep inside arch/x86/kernel/cpu/perf* and its naming became somewhat messy. The new location is under arch/x86/events/, using the following cleaner hierarchy of source code files: perf/x86: Move perf_event.c .................. => x86/events/core.c perf/x86: Move perf_event_amd.c .............. => x86/events/amd/core.c perf/x86: Move perf_event_amd_ibs.c .......... => x86/events/amd/ibs.c perf/x86: Move perf_event_amd_iommu.[ch] ..... => x86/events/amd/iommu.[ch] perf/x86: Move perf_event_amd_uncore.c ....... => x86/events/amd/uncore.c perf/x86: Move perf_event_intel_bts.c ........ => x86/events/intel/bts.c perf/x86: Move perf_event_intel.c ............ => x86/events/intel/core.c perf/x86: Move perf_event_intel_cqm.c ........ => x86/events/intel/cqm.c perf/x86: Move perf_event_intel_cstate.c ..... => x86/events/intel/cstate.c perf/x86: Move perf_event_intel_ds.c ......... => x86/events/intel/ds.c perf/x86: Move perf_event_intel_lbr.c ........ => x86/events/intel/lbr.c perf/x86: Move perf_event_intel_pt.[ch] ...... => x86/events/intel/pt.[ch] perf/x86: Move perf_event_intel_rapl.c ....... => x86/events/intel/rapl.c perf/x86: Move perf_event_intel_uncore.[ch] .. => x86/events/intel/uncore.[ch] perf/x86: Move perf_event_intel_uncore_nhmex.c => x86/events/intel/uncore_nmhex.c perf/x86: Move perf_event_intel_uncore_snb.c => x86/events/intel/uncore_snb.c perf/x86: Move perf_event_intel_uncore_snbep.c => x86/events/intel/uncore_snbep.c perf/x86: Move perf_event_knc.c .............. => x86/events/intel/knc.c perf/x86: Move perf_event_p4.c ............... => x86/events/intel/p4.c perf/x86: Move perf_event_p6.c ............... => x86/events/intel/p6.c perf/x86: Move perf_event_msr.c .............. => x86/events/msr.c (Borislav Petkov) - Update various x86 PMU constraint and hw support details (Stephane Eranian) - Optimize kprobes for BPF execution (Martin KaFai Lau) - Rewrite, refactor and fix the Intel uncore PMU driver code (Thomas Gleixner) - Rewrite, refactor and fix the Intel RAPL PMU code (Thomas Gleixner) - Various fixes and smaller cleanups. There are lots of perf tooling updates as well. A few highlights: perf report/top: - Hierarchy histogram mode for 'perf top' and 'perf report', showing multiple levels, one per --sort entry: (Namhyung Kim) On a mostly idle system: # perf top --hierarchy -s comm,dso Then expand some levels and use 'P' to take a snapshot: # cat perf.hist.0 - 92.32% perf 58.20% perf 22.29% libc-2.22.so 5.97% [kernel] 4.18% libelf-0.165.so 1.69% [unknown] - 4.71% qemu-system-x86 3.10% [kernel] 1.60% qemu-system-x86_64 (deleted) + 2.97% swapper # - Add 'L' hotkey to dynamicly set the percent threshold for histogram entries and callchains, i.e. dynamicly do what the --percent-limit command line option to 'top' and 'report' does. (Namhyung Kim) perf mem: - Allow specifying events via -e in 'perf mem record', also listing what events can be specified via 'perf mem record -e list' (Jiri Olsa) perf record: - Add 'perf record' --all-user/--all-kernel options, so that one can tell that all the events in the command line should be restricted to the user or kernel levels (Jiri Olsa), i.e.: perf record -e cycles:u,instructions:u is equivalent to: perf record --all-user -e cycles,instructions - Make 'perf record' collect CPU cache info in the perf.data file header: $ perf record usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.017 MB perf.data (7 samples) ] $ perf report --header-only -I | tail -10 | head -8 # CPU cache info: # L1 Data 32K [0-1] # L1 Instruction 32K [0-1] # L1 Data 32K [2-3] # L1 Instruction 32K [2-3] # L2 Unified 256K [0-1] # L2 Unified 256K [2-3] # L3 Unified 4096K [0-3] Will be used in 'perf c2c' and eventually in 'perf diff' to allow, for instance running the same workload in multiple machines and then when using 'diff' show the hardware difference. (Jiri Olsa) - Improved support for Java, using the JVMTI agent library to do jitdumps that then will be inserted in synthesized PERF_RECORD_MMAP2 events via 'perf inject' pointed to synthesized ELF files stored in ~/.debug and keyed with build-ids, to allow symbol resolution and even annotation with source line info, see the changeset comments to see how to use it (Stephane Eranian) perf script/trace: - Decode data_src values (e.g. perf.data files generated by 'perf mem record') in 'perf script': (Jiri Olsa) # perf script perf 693 [1] 4.088652: 1 cpu/mem-loads,ldlat=30/P: ffff88007d0b0f40 68100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK No <SNIP> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Improve support to 'data_src', 'weight' and 'addr' fields in 'perf script' (Jiri Olsa) - Handle empty print fmts in 'perf script -s' i.e. when running python or perl scripts (Taeung Song) perf stat: - 'perf stat' now shows shadow metrics (insn per cycle, etc) in interval mode too. E.g: # perf stat -I 1000 -e instructions,cycles sleep 1 # time counts unit events 1.000215928 519,620 instructions # 0.69 insn per cycle 1.000215928 752,003 cycles <SNIP> - Port 'perf kvm stat' to PowerPC (Hemant Kumar) - Implement CSV metrics output in 'perf stat' (Andi Kleen) perf BPF support: - Support converting data from bpf events in 'perf data' (Wang Nan) - Print bpf-output events in 'perf script': (Wang Nan). # perf record -e bpf-output/no-inherit,name=evt/ -e ./test_bpf_output_3.c/map:channel.event=evt/ usleep 1000 # perf script usleep 4882 21384.532523: evt: ffffffff810e97d1 sys_nanosleep ([kernel.kallsyms]) BPF output: 0000: 52 61 69 73 65 20 61 20 Raise a 0008: 42 50 46 20 65 76 65 6e BPF even 0010: 74 21 00 00 t!.. BPF string: "Raise a BPF event!" # - Add API to set values of map entries in a BPF object, be it individual map slots or ranges (Wang Nan) - Introduce support for the 'bpf-output' event (Wang Nan) - Add glue to read perf events in a BPF program (Wang Nan) - Improve support for bpf-output events in 'perf trace' (Wang Nan) ... and tons of other changes as well - see the shortlog and git log for details!" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (342 commits) perf stat: Add --metric-only support for -A perf stat: Implement --metric-only mode perf stat: Document CSV format in manpage perf hists browser: Check sort keys before hot key actions perf hists browser: Allow thread filtering for comm sort key perf tools: Add sort__has_comm variable perf tools: Recalc total periods using top-level entries in hierarchy perf tools: Remove nr_sort_keys field perf hists browser: Cleanup hist_browser__fprintf_hierarchy_entry() perf tools: Remove hist_entry->fmt field perf tools: Fix command line filters in hierarchy mode perf tools: Add more sort entry check functions perf tools: Fix hist_entry__filter() for hierarchy perf jitdump: Build only on supported archs tools lib traceevent: Add '~' operation within arg_num_eval() perf tools: Omit unnecessary cast in perf_pmu__parse_scale perf tools: Pass perf_hpp_list all the way through setup_sort_list perf tools: Fix perf script python database export crash perf jitdump: DWARF is also needed perf bench mem: Prepare the x86-64 build for upstream memcpy_mcsafe() changes ...
This commit is contained in:
@@ -2077,6 +2077,20 @@ int generic_processor_info(int apicid, int version)
|
||||
} else
|
||||
cpu = cpumask_next_zero(-1, cpu_present_mask);
|
||||
|
||||
/*
|
||||
* This can happen on physical hotplug. The sanity check at boot time
|
||||
* is done from native_smp_prepare_cpus() after num_possible_cpus() is
|
||||
* established.
|
||||
*/
|
||||
if (topology_update_package_map(apicid, cpu) < 0) {
|
||||
int thiscpu = max + disabled_cpus;
|
||||
|
||||
pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
|
||||
thiscpu, apicid);
|
||||
disabled_cpus++;
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate version
|
||||
*/
|
||||
|
@@ -30,33 +30,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
|
||||
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
|
||||
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
|
||||
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
|
||||
|
||||
ifdef CONFIG_PERF_EVENTS
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
|
||||
ifdef CONFIG_AMD_IOMMU
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
|
||||
endif
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o
|
||||
|
||||
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
|
||||
perf_event_intel_uncore_snb.o \
|
||||
perf_event_intel_uncore_snbep.o \
|
||||
perf_event_intel_uncore_nhmex.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o
|
||||
endif
|
||||
|
||||
|
||||
obj-$(CONFIG_X86_MCE) += mcheck/
|
||||
obj-$(CONFIG_MTRR) += mtrr/
|
||||
obj-$(CONFIG_MICROCODE) += microcode/
|
||||
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
|
||||
|
||||
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
|
||||
|
||||
|
@@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
||||
void (*f_vide)(void);
|
||||
u64 d, d2;
|
||||
|
||||
printk(KERN_INFO "AMD K6 stepping B detected - ");
|
||||
pr_info("AMD K6 stepping B detected - ");
|
||||
|
||||
/*
|
||||
* It looks like AMD fixed the 2.6.2 bug and improved indirect
|
||||
@@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
||||
d = d2-d;
|
||||
|
||||
if (d > 20*K6_BUG_LOOP)
|
||||
printk(KERN_CONT
|
||||
"system stability may be impaired when more than 32 MB are used.\n");
|
||||
pr_cont("system stability may be impaired when more than 32 MB are used.\n");
|
||||
else
|
||||
printk(KERN_CONT "probably OK (after B9730xxxx).\n");
|
||||
pr_cont("probably OK (after B9730xxxx).\n");
|
||||
}
|
||||
|
||||
/* K6 with old style WHCR */
|
||||
@@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
||||
wbinvd();
|
||||
wrmsr(MSR_K6_WHCR, l, h);
|
||||
local_irq_restore(flags);
|
||||
printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
|
||||
pr_info("Enabling old style K6 write allocation for %d Mb\n",
|
||||
mbytes);
|
||||
}
|
||||
return;
|
||||
@@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
|
||||
wbinvd();
|
||||
wrmsr(MSR_K6_WHCR, l, h);
|
||||
local_irq_restore(flags);
|
||||
printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
|
||||
pr_info("Enabling new style K6 write allocation for %d Mb\n",
|
||||
mbytes);
|
||||
}
|
||||
|
||||
@@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
|
||||
*/
|
||||
if (c->x86_model >= 6 && c->x86_model <= 10) {
|
||||
if (!cpu_has(c, X86_FEATURE_XMM)) {
|
||||
printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
|
||||
pr_info("Enabling disabled K7/SSE Support.\n");
|
||||
msr_clear_bit(MSR_K7_HWCR, 15);
|
||||
set_cpu_cap(c, X86_FEATURE_XMM);
|
||||
}
|
||||
@@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
|
||||
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
|
||||
rdmsr(MSR_K7_CLK_CTL, l, h);
|
||||
if ((l & 0xfff00000) != 0x20000000) {
|
||||
printk(KERN_INFO
|
||||
"CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
|
||||
l, ((l & 0x000fffff)|0x20000000));
|
||||
pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
|
||||
l, ((l & 0x000fffff)|0x20000000));
|
||||
wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
|
||||
}
|
||||
}
|
||||
@@ -485,7 +483,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
|
||||
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
|
||||
unsigned long pfn = tseg >> PAGE_SHIFT;
|
||||
|
||||
printk(KERN_DEBUG "tseg: %010llx\n", tseg);
|
||||
pr_debug("tseg: %010llx\n", tseg);
|
||||
if (pfn_range_is_mapped(pfn, pfn + 1))
|
||||
set_memory_4k((unsigned long)__va(tseg), 1);
|
||||
}
|
||||
@@ -500,8 +498,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
|
||||
|
||||
rdmsrl(MSR_K7_HWCR, val);
|
||||
if (!(val & BIT(24)))
|
||||
printk(KERN_WARNING FW_BUG "TSC doesn't count "
|
||||
"with P0 frequency!\n");
|
||||
pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -15,7 +15,7 @@ void __init check_bugs(void)
|
||||
{
|
||||
identify_boot_cpu();
|
||||
#if !defined(CONFIG_SMP)
|
||||
printk(KERN_INFO "CPU: ");
|
||||
pr_info("CPU: ");
|
||||
print_cpu_info(&boot_cpu_data);
|
||||
#endif
|
||||
alternative_instructions();
|
||||
|
@@ -29,7 +29,7 @@ static void init_c3(struct cpuinfo_x86 *c)
|
||||
rdmsr(MSR_VIA_FCR, lo, hi);
|
||||
lo |= ACE_FCR; /* enable ACE unit */
|
||||
wrmsr(MSR_VIA_FCR, lo, hi);
|
||||
printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
|
||||
pr_info("CPU: Enabled ACE h/w crypto\n");
|
||||
}
|
||||
|
||||
/* enable RNG unit, if present and disabled */
|
||||
@@ -37,7 +37,7 @@ static void init_c3(struct cpuinfo_x86 *c)
|
||||
rdmsr(MSR_VIA_RNG, lo, hi);
|
||||
lo |= RNG_ENABLE; /* enable RNG unit */
|
||||
wrmsr(MSR_VIA_RNG, lo, hi);
|
||||
printk(KERN_INFO "CPU: Enabled h/w RNG\n");
|
||||
pr_info("CPU: Enabled h/w RNG\n");
|
||||
}
|
||||
|
||||
/* store Centaur Extended Feature Flags as
|
||||
@@ -130,7 +130,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
|
||||
name = "C6";
|
||||
fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
|
||||
fcr_clr = DPDC;
|
||||
printk(KERN_NOTICE "Disabling bugged TSC.\n");
|
||||
pr_notice("Disabling bugged TSC.\n");
|
||||
clear_cpu_cap(c, X86_FEATURE_TSC);
|
||||
break;
|
||||
case 8:
|
||||
@@ -163,11 +163,11 @@ static void init_centaur(struct cpuinfo_x86 *c)
|
||||
newlo = (lo|fcr_set) & (~fcr_clr);
|
||||
|
||||
if (newlo != lo) {
|
||||
printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
|
||||
pr_info("Centaur FCR was 0x%X now 0x%X\n",
|
||||
lo, newlo);
|
||||
wrmsr(MSR_IDT_FCR1, newlo, hi);
|
||||
} else {
|
||||
printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
|
||||
pr_info("Centaur FCR is 0x%X\n", lo);
|
||||
}
|
||||
/* Emulate MTRRs using Centaur's MCR. */
|
||||
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
|
||||
|
@@ -228,7 +228,7 @@ static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
|
||||
lo |= 0x200000;
|
||||
wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
|
||||
|
||||
printk(KERN_NOTICE "CPU serial number disabled.\n");
|
||||
pr_notice("CPU serial number disabled.\n");
|
||||
clear_cpu_cap(c, X86_FEATURE_PN);
|
||||
|
||||
/* Disabling the serial number may affect the cpuid level */
|
||||
@@ -329,9 +329,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
|
||||
if (!warn)
|
||||
continue;
|
||||
|
||||
printk(KERN_WARNING
|
||||
"CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
|
||||
x86_cap_flag(df->feature), df->level);
|
||||
pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
|
||||
x86_cap_flag(df->feature), df->level);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -510,7 +509,7 @@ void detect_ht(struct cpuinfo_x86 *c)
|
||||
smp_num_siblings = (ebx & 0xff0000) >> 16;
|
||||
|
||||
if (smp_num_siblings == 1) {
|
||||
printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
|
||||
pr_info_once("CPU0: Hyper-Threading is disabled\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -531,10 +530,10 @@ void detect_ht(struct cpuinfo_x86 *c)
|
||||
|
||||
out:
|
||||
if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
|
||||
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
||||
c->phys_proc_id);
|
||||
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
||||
c->cpu_core_id);
|
||||
pr_info("CPU: Physical Processor ID: %d\n",
|
||||
c->phys_proc_id);
|
||||
pr_info("CPU: Processor Core ID: %d\n",
|
||||
c->cpu_core_id);
|
||||
printed = 1;
|
||||
}
|
||||
#endif
|
||||
@@ -559,9 +558,8 @@ static void get_cpu_vendor(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
printk_once(KERN_ERR
|
||||
"CPU: vendor_id '%s' unknown, using generic init.\n" \
|
||||
"CPU: Your system may be unstable.\n", v);
|
||||
pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
|
||||
"CPU: Your system may be unstable.\n", v);
|
||||
|
||||
c->x86_vendor = X86_VENDOR_UNKNOWN;
|
||||
this_cpu = &default_cpu;
|
||||
@@ -760,7 +758,7 @@ void __init early_cpu_init(void)
|
||||
int count = 0;
|
||||
|
||||
#ifdef CONFIG_PROCESSOR_SELECT
|
||||
printk(KERN_INFO "KERNEL supported cpus:\n");
|
||||
pr_info("KERNEL supported cpus:\n");
|
||||
#endif
|
||||
|
||||
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
|
||||
@@ -778,7 +776,7 @@ void __init early_cpu_init(void)
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (!cpudev->c_ident[j])
|
||||
continue;
|
||||
printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
|
||||
pr_info(" %s %s\n", cpudev->c_vendor,
|
||||
cpudev->c_ident[j]);
|
||||
}
|
||||
}
|
||||
@@ -977,6 +975,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
||||
#ifdef CONFIG_NUMA
|
||||
numa_add_cpu(smp_processor_id());
|
||||
#endif
|
||||
/* The boot/hotplug time assigment got cleared, restore it */
|
||||
c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1061,7 +1061,7 @@ static void __print_cpu_msr(void)
|
||||
for (index = index_min; index < index_max; index++) {
|
||||
if (rdmsrl_safe(index, &val))
|
||||
continue;
|
||||
printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
|
||||
pr_info(" MSR%08x: %016llx\n", index, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1100,19 +1100,19 @@ void print_cpu_info(struct cpuinfo_x86 *c)
|
||||
}
|
||||
|
||||
if (vendor && !strstr(c->x86_model_id, vendor))
|
||||
printk(KERN_CONT "%s ", vendor);
|
||||
pr_cont("%s ", vendor);
|
||||
|
||||
if (c->x86_model_id[0])
|
||||
printk(KERN_CONT "%s", c->x86_model_id);
|
||||
pr_cont("%s", c->x86_model_id);
|
||||
else
|
||||
printk(KERN_CONT "%d86", c->x86);
|
||||
pr_cont("%d86", c->x86);
|
||||
|
||||
printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
|
||||
pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
|
||||
|
||||
if (c->x86_mask || c->cpuid_level >= 0)
|
||||
printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
|
||||
pr_cont(", stepping: 0x%x)\n", c->x86_mask);
|
||||
else
|
||||
printk(KERN_CONT ")\n");
|
||||
pr_cont(")\n");
|
||||
|
||||
print_cpu_msr(c);
|
||||
}
|
||||
@@ -1438,7 +1438,7 @@ void cpu_init(void)
|
||||
|
||||
show_ucode_info_early();
|
||||
|
||||
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
||||
pr_info("Initializing CPU#%d\n", cpu);
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_VME) ||
|
||||
cpu_has_tsc ||
|
||||
|
@@ -103,7 +103,7 @@ static void check_cx686_slop(struct cpuinfo_x86 *c)
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (ccr5 & 2) { /* possible wrong calibration done */
|
||||
printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
|
||||
pr_info("Recalibrating delay loop with SLOP bit reset\n");
|
||||
calibrate_delay();
|
||||
c->loops_per_jiffy = loops_per_jiffy;
|
||||
}
|
||||
@@ -115,7 +115,7 @@ static void set_cx86_reorder(void)
|
||||
{
|
||||
u8 ccr3;
|
||||
|
||||
printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
|
||||
pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n");
|
||||
ccr3 = getCx86(CX86_CCR3);
|
||||
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
|
||||
|
||||
@@ -128,7 +128,7 @@ static void set_cx86_reorder(void)
|
||||
|
||||
static void set_cx86_memwb(void)
|
||||
{
|
||||
printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
|
||||
pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
|
||||
|
||||
/* CCR2 bit 2: unlock NW bit */
|
||||
setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
|
||||
@@ -268,7 +268,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
|
||||
* VSA1 we work around however.
|
||||
*/
|
||||
|
||||
printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
|
||||
pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n");
|
||||
isa_dma_bridge_buggy = 2;
|
||||
|
||||
/* We do this before the PCI layer is running. However we
|
||||
@@ -426,7 +426,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c)
|
||||
if (dir0 == 5 || dir0 == 3) {
|
||||
unsigned char ccr3;
|
||||
unsigned long flags;
|
||||
printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
|
||||
pr_info("Enabling CPUID on Cyrix processor.\n");
|
||||
local_irq_save(flags);
|
||||
ccr3 = getCx86(CX86_CCR3);
|
||||
/* enable MAPEN */
|
||||
|
@@ -56,7 +56,7 @@ detect_hypervisor_vendor(void)
|
||||
}
|
||||
|
||||
if (max_pri)
|
||||
printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
|
||||
pr_info("Hypervisor detected: %s\n", x86_hyper->name);
|
||||
}
|
||||
|
||||
void init_hypervisor(struct cpuinfo_x86 *c)
|
||||
|
@@ -61,7 +61,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
|
||||
*/
|
||||
if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
|
||||
c->microcode < 0x20e) {
|
||||
printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
|
||||
pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
|
||||
clear_cpu_cap(c, X86_FEATURE_PSE);
|
||||
}
|
||||
|
||||
@@ -140,7 +140,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
|
||||
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
|
||||
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
|
||||
if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
|
||||
printk(KERN_INFO "Disabled fast string operations\n");
|
||||
pr_info("Disabled fast string operations\n");
|
||||
setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
|
||||
setup_clear_cpu_cap(X86_FEATURE_ERMS);
|
||||
}
|
||||
@@ -160,6 +160,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
|
||||
pr_info("Disabling PGE capability bit\n");
|
||||
setup_clear_cpu_cap(X86_FEATURE_PGE);
|
||||
}
|
||||
|
||||
if (c->cpuid_level >= 0x00000001) {
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
|
||||
/*
|
||||
* If HTT (EDX[28]) is set EBX[16:23] contain the number of
|
||||
* apicids which are reserved per package. Store the resulting
|
||||
* shift value for the package management code.
|
||||
*/
|
||||
if (edx & (1U << 28))
|
||||
c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@@ -176,7 +189,7 @@ int ppro_with_ram_bug(void)
|
||||
boot_cpu_data.x86 == 6 &&
|
||||
boot_cpu_data.x86_model == 1 &&
|
||||
boot_cpu_data.x86_mask < 8) {
|
||||
printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
|
||||
pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
@@ -225,7 +238,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
|
||||
|
||||
set_cpu_bug(c, X86_BUG_F00F);
|
||||
if (!f00f_workaround_enabled) {
|
||||
printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
|
||||
pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
|
||||
f00f_workaround_enabled = 1;
|
||||
}
|
||||
}
|
||||
@@ -244,7 +257,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
|
||||
* Forcefully enable PAE if kernel parameter "forcepae" is present.
|
||||
*/
|
||||
if (forcepae) {
|
||||
printk(KERN_WARNING "PAE forced!\n");
|
||||
pr_warn("PAE forced!\n");
|
||||
set_cpu_cap(c, X86_FEATURE_PAE);
|
||||
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
|
||||
}
|
||||
|
@@ -444,7 +444,7 @@ static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
|
||||
err = amd_set_l3_disable_slot(nb, cpu, slot, val);
|
||||
if (err) {
|
||||
if (err == -EEXIST)
|
||||
pr_warning("L3 slot %d in use/index already disabled!\n",
|
||||
pr_warn("L3 slot %d in use/index already disabled!\n",
|
||||
slot);
|
||||
return err;
|
||||
}
|
||||
|
@@ -1,116 +0,0 @@
|
||||
/*
|
||||
* Intel(R) Processor Trace PMU driver for perf
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* Intel PT is specified in the Intel Architecture Instruction Set Extensions
|
||||
* Programming Reference:
|
||||
* http://software.intel.com/en-us/intel-isa-extensions
|
||||
*/
|
||||
|
||||
#ifndef __INTEL_PT_H__
|
||||
#define __INTEL_PT_H__
|
||||
|
||||
/*
|
||||
* Single-entry ToPA: when this close to region boundary, switch
|
||||
* buffers to avoid losing data.
|
||||
*/
|
||||
#define TOPA_PMI_MARGIN 512
|
||||
|
||||
#define TOPA_SHIFT 12
|
||||
|
||||
static inline unsigned int sizes(unsigned int tsz)
|
||||
{
|
||||
return 1 << (tsz + TOPA_SHIFT);
|
||||
};
|
||||
|
||||
struct topa_entry {
|
||||
u64 end : 1;
|
||||
u64 rsvd0 : 1;
|
||||
u64 intr : 1;
|
||||
u64 rsvd1 : 1;
|
||||
u64 stop : 1;
|
||||
u64 rsvd2 : 1;
|
||||
u64 size : 4;
|
||||
u64 rsvd3 : 2;
|
||||
u64 base : 36;
|
||||
u64 rsvd4 : 16;
|
||||
};
|
||||
|
||||
#define PT_CPUID_LEAVES 2
|
||||
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
|
||||
|
||||
enum pt_capabilities {
|
||||
PT_CAP_max_subleaf = 0,
|
||||
PT_CAP_cr3_filtering,
|
||||
PT_CAP_psb_cyc,
|
||||
PT_CAP_mtc,
|
||||
PT_CAP_topa_output,
|
||||
PT_CAP_topa_multiple_entries,
|
||||
PT_CAP_single_range_output,
|
||||
PT_CAP_payloads_lip,
|
||||
PT_CAP_mtc_periods,
|
||||
PT_CAP_cycle_thresholds,
|
||||
PT_CAP_psb_periods,
|
||||
};
|
||||
|
||||
struct pt_pmu {
|
||||
struct pmu pmu;
|
||||
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct pt_buffer - buffer configuration; one buffer per task_struct or
|
||||
* cpu, depending on perf event configuration
|
||||
* @cpu: cpu for per-cpu allocation
|
||||
* @tables: list of ToPA tables in this buffer
|
||||
* @first: shorthand for first topa table
|
||||
* @last: shorthand for last topa table
|
||||
* @cur: current topa table
|
||||
* @nr_pages: buffer size in pages
|
||||
* @cur_idx: current output region's index within @cur table
|
||||
* @output_off: offset within the current output region
|
||||
* @data_size: running total of the amount of data in this buffer
|
||||
* @lost: if data was lost/truncated
|
||||
* @head: logical write offset inside the buffer
|
||||
* @snapshot: if this is for a snapshot/overwrite counter
|
||||
* @stop_pos: STOP topa entry in the buffer
|
||||
* @intr_pos: INT topa entry in the buffer
|
||||
* @data_pages: array of pages from perf
|
||||
* @topa_index: table of topa entries indexed by page offset
|
||||
*/
|
||||
struct pt_buffer {
|
||||
int cpu;
|
||||
struct list_head tables;
|
||||
struct topa *first, *last, *cur;
|
||||
unsigned int cur_idx;
|
||||
size_t output_off;
|
||||
unsigned long nr_pages;
|
||||
local_t data_size;
|
||||
local_t lost;
|
||||
local64_t head;
|
||||
bool snapshot;
|
||||
unsigned long stop_pos, intr_pos;
|
||||
void **data_pages;
|
||||
struct topa_entry *topa_index[0];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct pt - per-cpu pt context
|
||||
* @handle: perf output handle
|
||||
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
|
||||
*/
|
||||
struct pt {
|
||||
struct perf_output_handle handle;
|
||||
int handle_nmi;
|
||||
};
|
||||
|
||||
#endif /* __INTEL_PT_H__ */
|
@@ -115,7 +115,7 @@ static int raise_local(void)
|
||||
int cpu = m->extcpu;
|
||||
|
||||
if (m->inject_flags & MCJ_EXCEPTION) {
|
||||
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
|
||||
pr_info("Triggering MCE exception on CPU %d\n", cpu);
|
||||
switch (context) {
|
||||
case MCJ_CTX_IRQ:
|
||||
/*
|
||||
@@ -128,15 +128,15 @@ static int raise_local(void)
|
||||
raise_exception(m, NULL);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_INFO "Invalid MCE context\n");
|
||||
pr_info("Invalid MCE context\n");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
|
||||
pr_info("MCE exception done on CPU %d\n", cpu);
|
||||
} else if (m->status) {
|
||||
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
|
||||
pr_info("Starting machine check poll CPU %d\n", cpu);
|
||||
raise_poll(m);
|
||||
mce_notify_irq();
|
||||
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
|
||||
pr_info("Machine check poll done on CPU %d\n", cpu);
|
||||
} else
|
||||
m->finished = 0;
|
||||
|
||||
@@ -183,8 +183,7 @@ static void raise_mce(struct mce *m)
|
||||
start = jiffies;
|
||||
while (!cpumask_empty(mce_inject_cpumask)) {
|
||||
if (!time_before(jiffies, start + 2*HZ)) {
|
||||
printk(KERN_ERR
|
||||
"Timeout waiting for mce inject %lx\n",
|
||||
pr_err("Timeout waiting for mce inject %lx\n",
|
||||
*cpumask_bits(mce_inject_cpumask));
|
||||
break;
|
||||
}
|
||||
@@ -241,7 +240,7 @@ static int inject_init(void)
|
||||
{
|
||||
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
printk(KERN_INFO "Machine check injector initialized\n");
|
||||
pr_info("Machine check injector initialized\n");
|
||||
register_mce_write_callback(mce_write);
|
||||
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
|
||||
"mce_notify");
|
||||
|
@@ -26,14 +26,12 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
|
||||
|
||||
printk(KERN_EMERG
|
||||
"CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
|
||||
smp_processor_id(), loaddr, lotype);
|
||||
pr_emerg("CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
|
||||
smp_processor_id(), loaddr, lotype);
|
||||
|
||||
if (lotype & (1<<5)) {
|
||||
printk(KERN_EMERG
|
||||
"CPU#%d: Possible thermal failure (CPU on fire ?).\n",
|
||||
smp_processor_id());
|
||||
pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
@@ -61,12 +59,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
|
||||
/* Read registers before enabling: */
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
|
||||
printk(KERN_INFO
|
||||
"Intel old style machine check architecture supported.\n");
|
||||
pr_info("Intel old style machine check architecture supported.\n");
|
||||
|
||||
/* Enable MCE: */
|
||||
cr4_set_bits(X86_CR4_MCE);
|
||||
printk(KERN_INFO
|
||||
"Intel old style machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
@@ -190,7 +190,7 @@ static int therm_throt_process(bool new_event, int event, int level)
|
||||
/* if we just entered the thermal event */
|
||||
if (new_event) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
|
||||
pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
|
||||
this_cpu,
|
||||
level == CORE_LEVEL ? "Core" : "Package",
|
||||
state->count);
|
||||
@@ -198,8 +198,7 @@ static int therm_throt_process(bool new_event, int event, int level)
|
||||
}
|
||||
if (old_event) {
|
||||
if (event == THERMAL_THROTTLING_EVENT)
|
||||
printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
|
||||
this_cpu,
|
||||
pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
|
||||
level == CORE_LEVEL ? "Core" : "Package");
|
||||
return 1;
|
||||
}
|
||||
@@ -417,8 +416,8 @@ static void intel_thermal_interrupt(void)
|
||||
|
||||
static void unexpected_thermal_interrupt(void)
|
||||
{
|
||||
printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
|
||||
smp_processor_id());
|
||||
pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
|
||||
@@ -499,7 +498,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
if (system_state == SYSTEM_BOOTING)
|
||||
printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -557,8 +556,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
|
||||
printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
|
||||
tm2 ? "TM2" : "TM1");
|
||||
pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
|
||||
tm2 ? "TM2" : "TM1");
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
|
@@ -12,8 +12,8 @@
|
||||
|
||||
static void default_threshold_interrupt(void)
|
||||
{
|
||||
printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
|
||||
THRESHOLD_APIC_VECTOR);
|
||||
pr_err("Unexpected threshold interrupt at vector %x\n",
|
||||
THRESHOLD_APIC_VECTOR);
|
||||
}
|
||||
|
||||
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
|
||||
|
@@ -17,7 +17,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
ist_enter(regs);
|
||||
|
||||
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
|
||||
pr_emerg("CPU0: Machine Check Exception.\n");
|
||||
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
ist_exit(regs);
|
||||
@@ -39,6 +39,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
|
||||
|
||||
cr4_set_bits(X86_CR4_MCE);
|
||||
|
||||
printk(KERN_INFO
|
||||
"Winchip machine check reporting enabled on CPU#0.\n");
|
||||
pr_info("Winchip machine check reporting enabled on CPU#0.\n");
|
||||
}
|
||||
|
@@ -953,7 +953,7 @@ struct microcode_ops * __init init_amd_microcode(void)
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
|
||||
pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
|
||||
pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@@ -161,8 +161,8 @@ static void __init ms_hyperv_init_platform(void)
|
||||
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
|
||||
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
|
||||
|
||||
printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
|
||||
ms_hyperv.features, ms_hyperv.hints);
|
||||
pr_info("HyperV: features 0x%x, hints 0x%x\n",
|
||||
ms_hyperv.features, ms_hyperv.hints);
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
|
||||
@@ -174,8 +174,8 @@ static void __init ms_hyperv_init_platform(void)
|
||||
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
|
||||
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
|
||||
lapic_timer_frequency = hv_lapic_frequency;
|
||||
printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
|
||||
lapic_timer_frequency);
|
||||
pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
|
||||
lapic_timer_frequency);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -103,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
|
||||
*/
|
||||
if (type != MTRR_TYPE_WRCOMB &&
|
||||
(centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
|
||||
pr_warning("mtrr: only write-combining%s supported\n",
|
||||
pr_warn("mtrr: only write-combining%s supported\n",
|
||||
centaur_mcr_type ? " and uncacheable are" : " is");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@@ -57,9 +57,9 @@ static int __initdata nr_range;
|
||||
static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
|
||||
|
||||
static int __initdata debug_print;
|
||||
#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
|
||||
#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0)
|
||||
|
||||
#define BIOS_BUG_MSG KERN_WARNING \
|
||||
#define BIOS_BUG_MSG \
|
||||
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
|
||||
|
||||
static int __init
|
||||
@@ -81,9 +81,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
|
||||
base, base + size);
|
||||
}
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After WB checking\n");
|
||||
pr_debug("After WB checking\n");
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
|
||||
pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
|
||||
range[i].start, range[i].end);
|
||||
}
|
||||
|
||||
@@ -101,7 +101,7 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
|
||||
(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
|
||||
(mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
|
||||
/* Var MTRR contains UC entry below 1M? Skip it: */
|
||||
printk(BIOS_BUG_MSG, i);
|
||||
pr_warn(BIOS_BUG_MSG, i);
|
||||
if (base + size <= (1<<(20-PAGE_SHIFT)))
|
||||
continue;
|
||||
size -= (1<<(20-PAGE_SHIFT)) - base;
|
||||
@@ -114,11 +114,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
|
||||
extra_remove_base + extra_remove_size);
|
||||
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After UC checking\n");
|
||||
pr_debug("After UC checking\n");
|
||||
for (i = 0; i < RANGE_NUM; i++) {
|
||||
if (!range[i].end)
|
||||
continue;
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
|
||||
pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
|
||||
range[i].start, range[i].end);
|
||||
}
|
||||
}
|
||||
@@ -126,9 +126,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
|
||||
/* sort the ranges */
|
||||
nr_range = clean_sort_range(range, RANGE_NUM);
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After sorting\n");
|
||||
pr_debug("After sorting\n");
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
|
||||
pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
|
||||
range[i].start, range[i].end);
|
||||
}
|
||||
|
||||
@@ -544,7 +544,7 @@ static void __init print_out_mtrr_range_state(void)
|
||||
start_base = to_size_factor(start_base, &start_factor),
|
||||
type = range_state[i].type;
|
||||
|
||||
printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
|
||||
pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
|
||||
i, start_base, start_factor,
|
||||
size_base, size_factor,
|
||||
(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
|
||||
@@ -713,7 +713,7 @@ int __init mtrr_cleanup(unsigned address_bits)
|
||||
return 0;
|
||||
|
||||
/* Print original var MTRRs at first, for debugging: */
|
||||
printk(KERN_DEBUG "original variable MTRRs\n");
|
||||
pr_debug("original variable MTRRs\n");
|
||||
print_out_mtrr_range_state();
|
||||
|
||||
memset(range, 0, sizeof(range));
|
||||
@@ -733,7 +733,7 @@ int __init mtrr_cleanup(unsigned address_bits)
|
||||
x_remove_base, x_remove_size);
|
||||
|
||||
range_sums = sum_ranges(range, nr_range);
|
||||
printk(KERN_INFO "total RAM covered: %ldM\n",
|
||||
pr_info("total RAM covered: %ldM\n",
|
||||
range_sums >> (20 - PAGE_SHIFT));
|
||||
|
||||
if (mtrr_chunk_size && mtrr_gran_size) {
|
||||
@@ -745,12 +745,11 @@ int __init mtrr_cleanup(unsigned address_bits)
|
||||
|
||||
if (!result[i].bad) {
|
||||
set_var_mtrr_all(address_bits);
|
||||
printk(KERN_DEBUG "New variable MTRRs\n");
|
||||
pr_debug("New variable MTRRs\n");
|
||||
print_out_mtrr_range_state();
|
||||
return 1;
|
||||
}
|
||||
printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
|
||||
"will find optimal one\n");
|
||||
pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
|
||||
}
|
||||
|
||||
i = 0;
|
||||
@@ -768,7 +767,7 @@ int __init mtrr_cleanup(unsigned address_bits)
|
||||
x_remove_base, x_remove_size, i);
|
||||
if (debug_print) {
|
||||
mtrr_print_out_one_result(i);
|
||||
printk(KERN_INFO "\n");
|
||||
pr_info("\n");
|
||||
}
|
||||
|
||||
i++;
|
||||
@@ -779,7 +778,7 @@ int __init mtrr_cleanup(unsigned address_bits)
|
||||
index_good = mtrr_search_optimal_index();
|
||||
|
||||
if (index_good != -1) {
|
||||
printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
|
||||
pr_info("Found optimal setting for mtrr clean up\n");
|
||||
i = index_good;
|
||||
mtrr_print_out_one_result(i);
|
||||
|
||||
@@ -790,7 +789,7 @@ int __init mtrr_cleanup(unsigned address_bits)
|
||||
gran_size <<= 10;
|
||||
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
|
||||
set_var_mtrr_all(address_bits);
|
||||
printk(KERN_DEBUG "New variable MTRRs\n");
|
||||
pr_debug("New variable MTRRs\n");
|
||||
print_out_mtrr_range_state();
|
||||
return 1;
|
||||
} else {
|
||||
@@ -799,8 +798,8 @@ int __init mtrr_cleanup(unsigned address_bits)
|
||||
mtrr_print_out_one_result(i);
|
||||
}
|
||||
|
||||
printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
|
||||
printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
|
||||
pr_info("mtrr_cleanup: can not find optimal value\n");
|
||||
pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -918,7 +917,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
|
||||
|
||||
/* kvm/qemu doesn't have mtrr set right, don't trim them all: */
|
||||
if (!highest_pfn) {
|
||||
printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
|
||||
pr_info("CPU MTRRs all blank - virtualized system.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -973,7 +972,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
|
||||
end_pfn);
|
||||
|
||||
if (total_trim_size) {
|
||||
pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
|
||||
pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n",
|
||||
total_trim_size >> 20);
|
||||
|
||||
if (!changed_by_mtrr_cleanup)
|
||||
WARN_ON(1);
|
||||
|
@@ -55,7 +55,7 @@ static inline void k8_check_syscfg_dram_mod_en(void)
|
||||
|
||||
rdmsr(MSR_K8_SYSCFG, lo, hi);
|
||||
if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
|
||||
printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
|
||||
pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
|
||||
" not cleared by BIOS, clearing this bit\n",
|
||||
smp_processor_id());
|
||||
lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
|
||||
@@ -501,14 +501,14 @@ void __init mtrr_state_warn(void)
|
||||
if (!mask)
|
||||
return;
|
||||
if (mask & MTRR_CHANGE_MASK_FIXED)
|
||||
pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
|
||||
pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
|
||||
if (mask & MTRR_CHANGE_MASK_VARIABLE)
|
||||
pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
|
||||
pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n");
|
||||
if (mask & MTRR_CHANGE_MASK_DEFTYPE)
|
||||
pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
|
||||
pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
|
||||
|
||||
printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
|
||||
printk(KERN_INFO "mtrr: corrected configuration.\n");
|
||||
pr_info("mtrr: probably your BIOS does not setup all CPUs.\n");
|
||||
pr_info("mtrr: corrected configuration.\n");
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -519,8 +519,7 @@ void __init mtrr_state_warn(void)
|
||||
void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
|
||||
{
|
||||
if (wrmsr_safe(msr, a, b) < 0) {
|
||||
printk(KERN_ERR
|
||||
"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
|
||||
pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
|
||||
smp_processor_id(), msr, a, b);
|
||||
}
|
||||
}
|
||||
@@ -607,7 +606,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
|
||||
tmp |= ~((1ULL<<(hi - 1)) - 1);
|
||||
|
||||
if (tmp != mask) {
|
||||
printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
|
||||
pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
|
||||
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
|
||||
mask = tmp;
|
||||
}
|
||||
@@ -858,13 +857,13 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
|
||||
boot_cpu_data.x86_model == 1 &&
|
||||
boot_cpu_data.x86_mask <= 7) {
|
||||
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
|
||||
pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
|
||||
pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!(base + size < 0x70000 || base > 0x7003F) &&
|
||||
(type == MTRR_TYPE_WRCOMB
|
||||
|| type == MTRR_TYPE_WRBACK)) {
|
||||
pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
|
||||
pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
@@ -878,7 +877,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
|
||||
lbase = lbase >> 1, last = last >> 1)
|
||||
;
|
||||
if (lbase != last) {
|
||||
pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
|
||||
pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
|
@@ -300,24 +300,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
|
||||
return error;
|
||||
|
||||
if (type >= MTRR_NUM_TYPES) {
|
||||
pr_warning("mtrr: type: %u invalid\n", type);
|
||||
pr_warn("mtrr: type: %u invalid\n", type);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* If the type is WC, check that this processor supports it */
|
||||
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
|
||||
pr_warning("mtrr: your processor doesn't support write-combining\n");
|
||||
pr_warn("mtrr: your processor doesn't support write-combining\n");
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
if (!size) {
|
||||
pr_warning("mtrr: zero sized request\n");
|
||||
pr_warn("mtrr: zero sized request\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((base | (base + size - 1)) >>
|
||||
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
|
||||
pr_warning("mtrr: base or size exceeds the MTRR width\n");
|
||||
pr_warn("mtrr: base or size exceeds the MTRR width\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -348,7 +348,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
|
||||
} else if (types_compatible(type, ltype))
|
||||
continue;
|
||||
}
|
||||
pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
|
||||
pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
|
||||
" 0x%lx000,0x%lx000\n", base, size, lbase,
|
||||
lsize);
|
||||
goto out;
|
||||
@@ -357,7 +357,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
|
||||
if (ltype != type) {
|
||||
if (types_compatible(type, ltype))
|
||||
continue;
|
||||
pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
|
||||
pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
|
||||
base, size, mtrr_attrib_to_str(ltype),
|
||||
mtrr_attrib_to_str(type));
|
||||
goto out;
|
||||
@@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
|
||||
static int mtrr_check(unsigned long base, unsigned long size)
|
||||
{
|
||||
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
|
||||
pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
|
||||
pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
|
||||
pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
|
||||
dump_stack();
|
||||
return -1;
|
||||
@@ -493,16 +493,16 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
|
||||
}
|
||||
}
|
||||
if (reg >= max) {
|
||||
pr_warning("mtrr: register: %d too big\n", reg);
|
||||
pr_warn("mtrr: register: %d too big\n", reg);
|
||||
goto out;
|
||||
}
|
||||
mtrr_if->get(reg, &lbase, &lsize, <ype);
|
||||
if (lsize < 1) {
|
||||
pr_warning("mtrr: MTRR %d not used\n", reg);
|
||||
pr_warn("mtrr: MTRR %d not used\n", reg);
|
||||
goto out;
|
||||
}
|
||||
if (mtrr_usage_table[reg] < 1) {
|
||||
pr_warning("mtrr: reg: %d has count=0\n", reg);
|
||||
pr_warn("mtrr: reg: %d has count=0\n", reg);
|
||||
goto out;
|
||||
}
|
||||
if (--mtrr_usage_table[reg] < 1)
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,955 +0,0 @@
|
||||
/*
|
||||
* Performance events x86 architecture header
|
||||
*
|
||||
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
|
||||
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
|
||||
* Copyright (C) 2009 Jaswinder Singh Rajput
|
||||
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
|
||||
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
|
||||
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
|
||||
* Copyright (C) 2009 Google, Inc., Stephane Eranian
|
||||
*
|
||||
* For licencing details see kernel-base/COPYING
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
/* To enable MSR tracing please use the generic trace points. */
|
||||
|
||||
/*
|
||||
* | NHM/WSM | SNB |
|
||||
* register -------------------------------
|
||||
* | HT | no HT | HT | no HT |
|
||||
*-----------------------------------------
|
||||
* offcore | core | core | cpu | core |
|
||||
* lbr_sel | core | core | cpu | core |
|
||||
* ld_lat | cpu | core | cpu | core |
|
||||
*-----------------------------------------
|
||||
*
|
||||
* Given that there is a small number of shared regs,
|
||||
* we can pre-allocate their slot in the per-cpu
|
||||
* per-core reg tables.
|
||||
*/
|
||||
enum extra_reg_type {
|
||||
EXTRA_REG_NONE = -1, /* not used */
|
||||
|
||||
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
|
||||
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
|
||||
EXTRA_REG_LBR = 2, /* lbr_select */
|
||||
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
|
||||
EXTRA_REG_FE = 4, /* fe_* */
|
||||
|
||||
EXTRA_REG_MAX /* number of entries needed */
|
||||
};
|
||||
|
||||
struct event_constraint {
|
||||
union {
|
||||
unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
u64 idxmsk64;
|
||||
};
|
||||
u64 code;
|
||||
u64 cmask;
|
||||
int weight;
|
||||
int overlap;
|
||||
int flags;
|
||||
};
|
||||
/*
|
||||
* struct hw_perf_event.flags flags
|
||||
*/
|
||||
#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
|
||||
#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
|
||||
#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
|
||||
#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
|
||||
#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
|
||||
#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
|
||||
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
|
||||
#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
|
||||
#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
|
||||
#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */
|
||||
|
||||
|
||||
struct amd_nb {
|
||||
int nb_id; /* NorthBridge id */
|
||||
int refcnt; /* reference count */
|
||||
struct perf_event *owners[X86_PMC_IDX_MAX];
|
||||
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
|
||||
};
|
||||
|
||||
/* The maximal number of PEBS events: */
|
||||
#define MAX_PEBS_EVENTS 8
|
||||
|
||||
/*
|
||||
* Flags PEBS can handle without an PMI.
|
||||
*
|
||||
* TID can only be handled by flushing at context switch.
|
||||
*
|
||||
*/
|
||||
#define PEBS_FREERUNNING_FLAGS \
|
||||
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
|
||||
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
|
||||
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
|
||||
PERF_SAMPLE_TRANSACTION)
|
||||
|
||||
/*
|
||||
* A debug store configuration.
|
||||
*
|
||||
* We only support architectures that use 64bit fields.
|
||||
*/
|
||||
struct debug_store {
|
||||
u64 bts_buffer_base;
|
||||
u64 bts_index;
|
||||
u64 bts_absolute_maximum;
|
||||
u64 bts_interrupt_threshold;
|
||||
u64 pebs_buffer_base;
|
||||
u64 pebs_index;
|
||||
u64 pebs_absolute_maximum;
|
||||
u64 pebs_interrupt_threshold;
|
||||
u64 pebs_event_reset[MAX_PEBS_EVENTS];
|
||||
};
|
||||
|
||||
/*
|
||||
* Per register state.
|
||||
*/
|
||||
struct er_account {
|
||||
raw_spinlock_t lock; /* per-core: protect structure */
|
||||
u64 config; /* extra MSR config */
|
||||
u64 reg; /* extra MSR number */
|
||||
atomic_t ref; /* reference count */
|
||||
};
|
||||
|
||||
/*
|
||||
* Per core/cpu state
|
||||
*
|
||||
* Used to coordinate shared registers between HT threads or
|
||||
* among events on a single PMU.
|
||||
*/
|
||||
struct intel_shared_regs {
|
||||
struct er_account regs[EXTRA_REG_MAX];
|
||||
int refcnt; /* per-core: #HT threads */
|
||||
unsigned core_id; /* per-core: core id */
|
||||
};
|
||||
|
||||
enum intel_excl_state_type {
|
||||
INTEL_EXCL_UNUSED = 0, /* counter is unused */
|
||||
INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */
|
||||
INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
|
||||
};
|
||||
|
||||
struct intel_excl_states {
|
||||
enum intel_excl_state_type state[X86_PMC_IDX_MAX];
|
||||
bool sched_started; /* true if scheduling has started */
|
||||
};
|
||||
|
||||
struct intel_excl_cntrs {
|
||||
raw_spinlock_t lock;
|
||||
|
||||
struct intel_excl_states states[2];
|
||||
|
||||
union {
|
||||
u16 has_exclusive[2];
|
||||
u32 exclusive_present;
|
||||
};
|
||||
|
||||
int refcnt; /* per-core: #HT threads */
|
||||
unsigned core_id; /* per-core: core id */
|
||||
};
|
||||
|
||||
#define MAX_LBR_ENTRIES 32
|
||||
|
||||
enum {
|
||||
X86_PERF_KFREE_SHARED = 0,
|
||||
X86_PERF_KFREE_EXCL = 1,
|
||||
X86_PERF_KFREE_MAX
|
||||
};
|
||||
|
||||
struct cpu_hw_events {
|
||||
/*
|
||||
* Generic x86 PMC bits
|
||||
*/
|
||||
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
|
||||
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
int enabled;
|
||||
|
||||
int n_events; /* the # of events in the below arrays */
|
||||
int n_added; /* the # last events in the below arrays;
|
||||
they've never been enabled yet */
|
||||
int n_txn; /* the # last events in the below arrays;
|
||||
added in the current transaction */
|
||||
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
|
||||
u64 tags[X86_PMC_IDX_MAX];
|
||||
|
||||
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
|
||||
struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
|
||||
|
||||
int n_excl; /* the number of exclusive events */
|
||||
|
||||
unsigned int txn_flags;
|
||||
int is_fake;
|
||||
|
||||
/*
|
||||
* Intel DebugStore bits
|
||||
*/
|
||||
struct debug_store *ds;
|
||||
u64 pebs_enabled;
|
||||
|
||||
/*
|
||||
* Intel LBR bits
|
||||
*/
|
||||
int lbr_users;
|
||||
void *lbr_context;
|
||||
struct perf_branch_stack lbr_stack;
|
||||
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
|
||||
struct er_account *lbr_sel;
|
||||
u64 br_sel;
|
||||
|
||||
/*
|
||||
* Intel host/guest exclude bits
|
||||
*/
|
||||
u64 intel_ctrl_guest_mask;
|
||||
u64 intel_ctrl_host_mask;
|
||||
struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
|
||||
|
||||
/*
|
||||
* Intel checkpoint mask
|
||||
*/
|
||||
u64 intel_cp_status;
|
||||
|
||||
/*
|
||||
* manage shared (per-core, per-cpu) registers
|
||||
* used on Intel NHM/WSM/SNB
|
||||
*/
|
||||
struct intel_shared_regs *shared_regs;
|
||||
/*
|
||||
* manage exclusive counter access between hyperthread
|
||||
*/
|
||||
struct event_constraint *constraint_list; /* in enable order */
|
||||
struct intel_excl_cntrs *excl_cntrs;
|
||||
int excl_thread_id; /* 0 or 1 */
|
||||
|
||||
/*
|
||||
* AMD specific bits
|
||||
*/
|
||||
struct amd_nb *amd_nb;
|
||||
/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
|
||||
u64 perf_ctr_virt_mask;
|
||||
|
||||
void *kfree_on_online[X86_PERF_KFREE_MAX];
|
||||
};
|
||||
|
||||
#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
|
||||
{ .idxmsk64 = (n) }, \
|
||||
.code = (c), \
|
||||
.cmask = (m), \
|
||||
.weight = (w), \
|
||||
.overlap = (o), \
|
||||
.flags = f, \
|
||||
}
|
||||
|
||||
#define EVENT_CONSTRAINT(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
|
||||
|
||||
#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
|
||||
0, PERF_X86_EVENT_EXCL)
|
||||
|
||||
/*
|
||||
* The overlap flag marks event constraints with overlapping counter
|
||||
* masks. This is the case if the counter mask of such an event is not
|
||||
* a subset of any other counter mask of a constraint with an equal or
|
||||
* higher weight, e.g.:
|
||||
*
|
||||
* c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
|
||||
* c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
|
||||
* c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
|
||||
*
|
||||
* The event scheduler may not select the correct counter in the first
|
||||
* cycle because it needs to know which subsequent events will be
|
||||
* scheduled. It may fail to schedule the events then. So we set the
|
||||
* overlap flag for such constraints to give the scheduler a hint which
|
||||
* events to select for counter rescheduling.
|
||||
*
|
||||
* Care must be taken as the rescheduling algorithm is O(n!) which
|
||||
* will increase scheduling cycles for an over-commited system
|
||||
* dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
|
||||
* and its counter masks must be kept at a minimum.
|
||||
*/
|
||||
#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code.
|
||||
*/
|
||||
#define INTEL_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code + UMask + fixed-mask
|
||||
*
|
||||
* filter mask to validate fixed counter events.
|
||||
* the following filters disqualify for fixed counters:
|
||||
* - inv
|
||||
* - edge
|
||||
* - cnt-mask
|
||||
* - in_tx
|
||||
* - in_tx_checkpointed
|
||||
* The other filters are supported by fixed counters.
|
||||
* The any-thread option is supported starting with v3.
|
||||
*/
|
||||
#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
|
||||
#define FIXED_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code + UMask
|
||||
*/
|
||||
#define INTEL_UEVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
|
||||
|
||||
/* Constraint on specific umask bit only + event */
|
||||
#define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
|
||||
|
||||
/* Like UEVENT_CONSTRAINT, but match flags too */
|
||||
#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
|
||||
|
||||
#define INTEL_EXCLUEVT_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
|
||||
|
||||
#define INTEL_PLD_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
|
||||
|
||||
#define INTEL_PST_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
|
||||
|
||||
/* Event constraint, but match on all event flags too. */
|
||||
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
|
||||
|
||||
/* Check only flags, but allow all event/umask */
|
||||
#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
|
||||
EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
|
||||
|
||||
/* Check flags and event code, and set the HSW store flag */
|
||||
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
|
||||
|
||||
/* Check flags and event code, and set the HSW load flag */
|
||||
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
|
||||
|
||||
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, \
|
||||
PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
|
||||
|
||||
/* Check flags and event code/umask, and set the HSW store flag */
|
||||
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
|
||||
|
||||
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, \
|
||||
PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
|
||||
|
||||
/* Check flags and event code/umask, and set the HSW load flag */
|
||||
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
|
||||
|
||||
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, \
|
||||
PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
|
||||
|
||||
/* Check flags and event code/umask, and set the HSW N/A flag */
|
||||
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
|
||||
__EVENT_CONSTRAINT(code, n, \
|
||||
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
|
||||
|
||||
|
||||
/*
|
||||
* We define the end marker as having a weight of -1
|
||||
* to enable blacklisting of events using a counter bitmask
|
||||
* of zero and thus a weight of zero.
|
||||
* The end marker has a weight that cannot possibly be
|
||||
* obtained from counting the bits in the bitmask.
|
||||
*/
|
||||
#define EVENT_CONSTRAINT_END { .weight = -1 }
|
||||
|
||||
/*
|
||||
* Check for end marker with weight == -1
|
||||
*/
|
||||
#define for_each_event_constraint(e, c) \
|
||||
for ((e) = (c); (e)->weight != -1; (e)++)
|
||||
|
||||
/*
|
||||
* Extra registers for specific events.
|
||||
*
|
||||
* Some events need large masks and require external MSRs.
|
||||
* Those extra MSRs end up being shared for all events on
|
||||
* a PMU and sometimes between PMU of sibling HT threads.
|
||||
* In either case, the kernel needs to handle conflicting
|
||||
* accesses to those extra, shared, regs. The data structure
|
||||
* to manage those registers is stored in cpu_hw_event.
|
||||
*/
|
||||
struct extra_reg {
|
||||
unsigned int event;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 valid_mask;
|
||||
int idx; /* per_xxx->regs[] reg index */
|
||||
bool extra_msr_access;
|
||||
};
|
||||
|
||||
#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
|
||||
.event = (e), \
|
||||
.msr = (ms), \
|
||||
.config_mask = (m), \
|
||||
.valid_mask = (vm), \
|
||||
.idx = EXTRA_REG_##i, \
|
||||
.extra_msr_access = true, \
|
||||
}
|
||||
|
||||
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
|
||||
|
||||
#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
|
||||
ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
|
||||
|
||||
#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
|
||||
INTEL_UEVENT_EXTRA_REG(c, \
|
||||
MSR_PEBS_LD_LAT_THRESHOLD, \
|
||||
0xffff, \
|
||||
LDLAT)
|
||||
|
||||
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
|
||||
|
||||
union perf_capabilities {
|
||||
struct {
|
||||
u64 lbr_format:6;
|
||||
u64 pebs_trap:1;
|
||||
u64 pebs_arch_reg:1;
|
||||
u64 pebs_format:4;
|
||||
u64 smm_freeze:1;
|
||||
/*
|
||||
* PMU supports separate counter range for writing
|
||||
* values > 32bit.
|
||||
*/
|
||||
u64 full_width_write:1;
|
||||
};
|
||||
u64 capabilities;
|
||||
};
|
||||
|
||||
struct x86_pmu_quirk {
|
||||
struct x86_pmu_quirk *next;
|
||||
void (*func)(void);
|
||||
};
|
||||
|
||||
union x86_pmu_config {
|
||||
struct {
|
||||
u64 event:8,
|
||||
umask:8,
|
||||
usr:1,
|
||||
os:1,
|
||||
edge:1,
|
||||
pc:1,
|
||||
interrupt:1,
|
||||
__reserved1:1,
|
||||
en:1,
|
||||
inv:1,
|
||||
cmask:8,
|
||||
event2:4,
|
||||
__reserved2:4,
|
||||
go:1,
|
||||
ho:1;
|
||||
} bits;
|
||||
u64 value;
|
||||
};
|
||||
|
||||
#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
|
||||
|
||||
enum {
|
||||
x86_lbr_exclusive_lbr,
|
||||
x86_lbr_exclusive_bts,
|
||||
x86_lbr_exclusive_pt,
|
||||
x86_lbr_exclusive_max,
|
||||
};
|
||||
|
||||
/*
|
||||
* struct x86_pmu - generic x86 pmu
|
||||
*/
|
||||
struct x86_pmu {
|
||||
/*
|
||||
* Generic x86 PMC bits
|
||||
*/
|
||||
const char *name;
|
||||
int version;
|
||||
int (*handle_irq)(struct pt_regs *);
|
||||
void (*disable_all)(void);
|
||||
void (*enable_all)(int added);
|
||||
void (*enable)(struct perf_event *);
|
||||
void (*disable)(struct perf_event *);
|
||||
int (*hw_config)(struct perf_event *event);
|
||||
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
|
||||
unsigned eventsel;
|
||||
unsigned perfctr;
|
||||
int (*addr_offset)(int index, bool eventsel);
|
||||
int (*rdpmc_index)(int index);
|
||||
u64 (*event_map)(int);
|
||||
int max_events;
|
||||
int num_counters;
|
||||
int num_counters_fixed;
|
||||
int cntval_bits;
|
||||
u64 cntval_mask;
|
||||
union {
|
||||
unsigned long events_maskl;
|
||||
unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
|
||||
};
|
||||
int events_mask_len;
|
||||
int apic;
|
||||
u64 max_period;
|
||||
struct event_constraint *
|
||||
(*get_event_constraints)(struct cpu_hw_events *cpuc,
|
||||
int idx,
|
||||
struct perf_event *event);
|
||||
|
||||
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event);
|
||||
|
||||
void (*start_scheduling)(struct cpu_hw_events *cpuc);
|
||||
|
||||
void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
|
||||
|
||||
void (*stop_scheduling)(struct cpu_hw_events *cpuc);
|
||||
|
||||
struct event_constraint *event_constraints;
|
||||
struct x86_pmu_quirk *quirks;
|
||||
int perfctr_second_write;
|
||||
bool late_ack;
|
||||
unsigned (*limit_period)(struct perf_event *event, unsigned l);
|
||||
|
||||
/*
|
||||
* sysfs attrs
|
||||
*/
|
||||
int attr_rdpmc_broken;
|
||||
int attr_rdpmc;
|
||||
struct attribute **format_attrs;
|
||||
struct attribute **event_attrs;
|
||||
|
||||
ssize_t (*events_sysfs_show)(char *page, u64 config);
|
||||
struct attribute **cpu_events;
|
||||
|
||||
/*
|
||||
* CPU Hotplug hooks
|
||||
*/
|
||||
int (*cpu_prepare)(int cpu);
|
||||
void (*cpu_starting)(int cpu);
|
||||
void (*cpu_dying)(int cpu);
|
||||
void (*cpu_dead)(int cpu);
|
||||
|
||||
void (*check_microcode)(void);
|
||||
void (*sched_task)(struct perf_event_context *ctx,
|
||||
bool sched_in);
|
||||
|
||||
/*
|
||||
* Intel Arch Perfmon v2+
|
||||
*/
|
||||
u64 intel_ctrl;
|
||||
union perf_capabilities intel_cap;
|
||||
|
||||
/*
|
||||
* Intel DebugStore bits
|
||||
*/
|
||||
unsigned int bts :1,
|
||||
bts_active :1,
|
||||
pebs :1,
|
||||
pebs_active :1,
|
||||
pebs_broken :1,
|
||||
pebs_prec_dist :1;
|
||||
int pebs_record_size;
|
||||
void (*drain_pebs)(struct pt_regs *regs);
|
||||
struct event_constraint *pebs_constraints;
|
||||
void (*pebs_aliases)(struct perf_event *event);
|
||||
int max_pebs_events;
|
||||
unsigned long free_running_flags;
|
||||
|
||||
/*
|
||||
* Intel LBR
|
||||
*/
|
||||
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
|
||||
int lbr_nr; /* hardware stack size */
|
||||
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
|
||||
const int *lbr_sel_map; /* lbr_select mappings */
|
||||
bool lbr_double_abort; /* duplicated lbr aborts */
|
||||
|
||||
/*
|
||||
* Intel PT/LBR/BTS are exclusive
|
||||
*/
|
||||
atomic_t lbr_exclusive[x86_lbr_exclusive_max];
|
||||
|
||||
/*
|
||||
* Extra registers for events
|
||||
*/
|
||||
struct extra_reg *extra_regs;
|
||||
unsigned int flags;
|
||||
|
||||
/*
|
||||
* Intel host/guest support (KVM)
|
||||
*/
|
||||
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
|
||||
};
|
||||
|
||||
struct x86_perf_task_context {
|
||||
u64 lbr_from[MAX_LBR_ENTRIES];
|
||||
u64 lbr_to[MAX_LBR_ENTRIES];
|
||||
u64 lbr_info[MAX_LBR_ENTRIES];
|
||||
int tos;
|
||||
int lbr_callstack_users;
|
||||
int lbr_stack_state;
|
||||
};
|
||||
|
||||
#define x86_add_quirk(func_) \
|
||||
do { \
|
||||
static struct x86_pmu_quirk __quirk __initdata = { \
|
||||
.func = func_, \
|
||||
}; \
|
||||
__quirk.next = x86_pmu.quirks; \
|
||||
x86_pmu.quirks = &__quirk; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* x86_pmu flags
|
||||
*/
|
||||
#define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */
|
||||
#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */
|
||||
#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */
|
||||
#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */
|
||||
|
||||
#define EVENT_VAR(_id) event_attr_##_id
|
||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||
|
||||
#define EVENT_ATTR(_name, _id) \
|
||||
static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
|
||||
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
|
||||
.id = PERF_COUNT_HW_##_id, \
|
||||
.event_str = NULL, \
|
||||
};
|
||||
|
||||
#define EVENT_ATTR_STR(_name, v, str) \
|
||||
static struct perf_pmu_events_attr event_attr_##v = { \
|
||||
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
|
||||
.id = 0, \
|
||||
.event_str = str, \
|
||||
};
|
||||
|
||||
extern struct x86_pmu x86_pmu __read_mostly;
|
||||
|
||||
static inline bool x86_pmu_has_lbr_callstack(void)
|
||||
{
|
||||
return x86_pmu.lbr_sel_map &&
|
||||
x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
|
||||
|
||||
int x86_perf_event_set_period(struct perf_event *event);
|
||||
|
||||
/*
|
||||
* Generalized hw caching related hw_event table, filled
|
||||
* in on a per model basis. A value of 0 means
|
||||
* 'not supported', -1 means 'hw_event makes no sense on
|
||||
* this CPU', any other value means the raw hw_event
|
||||
* ID.
|
||||
*/
|
||||
|
||||
#define C(x) PERF_COUNT_HW_CACHE_##x
|
||||
|
||||
extern u64 __read_mostly hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
||||
extern u64 __read_mostly hw_cache_extra_regs
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
||||
|
||||
u64 x86_perf_event_update(struct perf_event *event);
|
||||
|
||||
static inline unsigned int x86_pmu_config_addr(int index)
|
||||
{
|
||||
return x86_pmu.eventsel + (x86_pmu.addr_offset ?
|
||||
x86_pmu.addr_offset(index, true) : index);
|
||||
}
|
||||
|
||||
static inline unsigned int x86_pmu_event_addr(int index)
|
||||
{
|
||||
return x86_pmu.perfctr + (x86_pmu.addr_offset ?
|
||||
x86_pmu.addr_offset(index, false) : index);
|
||||
}
|
||||
|
||||
static inline int x86_pmu_rdpmc_index(int index)
|
||||
{
|
||||
return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
|
||||
}
|
||||
|
||||
int x86_add_exclusive(unsigned int what);
|
||||
|
||||
void x86_del_exclusive(unsigned int what);
|
||||
|
||||
int x86_reserve_hardware(void);
|
||||
|
||||
void x86_release_hardware(void);
|
||||
|
||||
void hw_perf_lbr_event_destroy(struct perf_event *event);
|
||||
|
||||
int x86_setup_perfctr(struct perf_event *event);
|
||||
|
||||
int x86_pmu_hw_config(struct perf_event *event);
|
||||
|
||||
void x86_pmu_disable_all(void);
|
||||
|
||||
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
|
||||
u64 enable_mask)
|
||||
{
|
||||
u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
|
||||
|
||||
if (hwc->extra_reg.reg)
|
||||
wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
|
||||
wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
|
||||
}
|
||||
|
||||
void x86_pmu_enable_all(int added);
|
||||
|
||||
int perf_assign_events(struct event_constraint **constraints, int n,
|
||||
int wmin, int wmax, int gpmax, int *assign);
|
||||
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
|
||||
|
||||
void x86_pmu_stop(struct perf_event *event, int flags);
|
||||
|
||||
static inline void x86_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
wrmsrl(hwc->config_base, hwc->config);
|
||||
}
|
||||
|
||||
void x86_pmu_enable_event(struct perf_event *event);
|
||||
|
||||
int x86_pmu_handle_irq(struct pt_regs *regs);
|
||||
|
||||
extern struct event_constraint emptyconstraint;
|
||||
|
||||
extern struct event_constraint unconstrained;
|
||||
|
||||
static inline bool kernel_ip(unsigned long ip)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return ip > PAGE_OFFSET;
|
||||
#else
|
||||
return (long)ip < 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Not all PMUs provide the right context information to place the reported IP
|
||||
* into full context. Specifically segment registers are typically not
|
||||
* supplied.
|
||||
*
|
||||
* Assuming the address is a linear address (it is for IBS), we fake the CS and
|
||||
* vm86 mode using the known zero-based code segment and 'fix up' the registers
|
||||
* to reflect this.
|
||||
*
|
||||
* Intel PEBS/LBR appear to typically provide the effective address, nothing
|
||||
* much we can do about that but pray and treat it like a linear address.
|
||||
*/
|
||||
static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
|
||||
{
|
||||
regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
|
||||
if (regs->flags & X86_VM_MASK)
|
||||
regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
|
||||
regs->ip = ip;
|
||||
}
|
||||
|
||||
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
|
||||
ssize_t intel_event_sysfs_show(char *page, u64 config);
|
||||
|
||||
struct attribute **merge_attr(struct attribute **a, struct attribute **b);
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_AMD
|
||||
|
||||
int amd_pmu_init(void);
|
||||
|
||||
#else /* CONFIG_CPU_SUP_AMD */
|
||||
|
||||
static inline int amd_pmu_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_CPU_SUP_AMD */
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
|
||||
static inline bool intel_pmu_has_bts(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
|
||||
!event->attr.freq && event->hw.sample_period == 1)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int intel_pmu_save_and_restart(struct perf_event *event);
|
||||
|
||||
struct event_constraint *
|
||||
x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event);
|
||||
|
||||
struct intel_shared_regs *allocate_shared_regs(int cpu);
|
||||
|
||||
int intel_pmu_init(void);
|
||||
|
||||
void init_debug_store_on_cpu(int cpu);
|
||||
|
||||
void fini_debug_store_on_cpu(int cpu);
|
||||
|
||||
void release_ds_buffers(void);
|
||||
|
||||
void reserve_ds_buffers(void);
|
||||
|
||||
extern struct event_constraint bts_constraint;
|
||||
|
||||
void intel_pmu_enable_bts(u64 config);
|
||||
|
||||
void intel_pmu_disable_bts(void);
|
||||
|
||||
int intel_pmu_drain_bts_buffer(void);
|
||||
|
||||
extern struct event_constraint intel_core2_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_atom_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_slm_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_westmere_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_snb_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_ivb_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_hsw_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_skl_pebs_event_constraints[];
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_disable(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_enable_all(void);
|
||||
|
||||
void intel_pmu_pebs_disable_all(void);
|
||||
|
||||
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
|
||||
void intel_ds_init(void);
|
||||
|
||||
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
|
||||
void intel_pmu_lbr_reset(void);
|
||||
|
||||
void intel_pmu_lbr_enable(struct perf_event *event);
|
||||
|
||||
void intel_pmu_lbr_disable(struct perf_event *event);
|
||||
|
||||
void intel_pmu_lbr_enable_all(bool pmi);
|
||||
|
||||
void intel_pmu_lbr_disable_all(void);
|
||||
|
||||
void intel_pmu_lbr_read(void);
|
||||
|
||||
void intel_pmu_lbr_init_core(void);
|
||||
|
||||
void intel_pmu_lbr_init_nhm(void);
|
||||
|
||||
void intel_pmu_lbr_init_atom(void);
|
||||
|
||||
void intel_pmu_lbr_init_snb(void);
|
||||
|
||||
void intel_pmu_lbr_init_hsw(void);
|
||||
|
||||
void intel_pmu_lbr_init_skl(void);
|
||||
|
||||
void intel_pmu_lbr_init_knl(void);
|
||||
|
||||
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
||||
|
||||
void intel_pt_interrupt(void);
|
||||
|
||||
int intel_bts_interrupt(void);
|
||||
|
||||
void intel_bts_enable_local(void);
|
||||
|
||||
void intel_bts_disable_local(void);
|
||||
|
||||
int p4_pmu_init(void);
|
||||
|
||||
int p6_pmu_init(void);
|
||||
|
||||
int knc_pmu_init(void);
|
||||
|
||||
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page);
|
||||
|
||||
static inline int is_ht_workaround_enabled(void)
|
||||
{
|
||||
return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
|
||||
}
|
||||
|
||||
#else /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
static inline void reserve_ds_buffers(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void release_ds_buffers(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int intel_pmu_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int is_ht_workaround_enabled(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_CPU_SUP_INTEL */
|
@@ -1,731 +0,0 @@
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/apicdef.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static __initconst const u64 amd_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
|
||||
[ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
|
||||
[ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
|
||||
[ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
|
||||
[ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
|
||||
[ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
|
||||
[ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
|
||||
[ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(NODE) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
|
||||
[ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* AMD Performance Monitor K7 and later.
|
||||
*/
|
||||
static const u64 amd_perfmon_event_map[] =
|
||||
{
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
|
||||
};
|
||||
|
||||
static u64 amd_pmu_event_map(int hw_event)
|
||||
{
|
||||
return amd_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
/*
|
||||
* Previously calculated offsets
|
||||
*/
|
||||
static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
|
||||
static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
|
||||
|
||||
/*
|
||||
* Legacy CPUs:
|
||||
* 4 counters starting at 0xc0010000 each offset by 1
|
||||
*
|
||||
* CPUs with core performance counter extensions:
|
||||
* 6 counters starting at 0xc0010200 each offset by 2
|
||||
*/
|
||||
static inline int amd_pmu_addr_offset(int index, bool eventsel)
|
||||
{
|
||||
int offset;
|
||||
|
||||
if (!index)
|
||||
return index;
|
||||
|
||||
if (eventsel)
|
||||
offset = event_offsets[index];
|
||||
else
|
||||
offset = count_offsets[index];
|
||||
|
||||
if (offset)
|
||||
return offset;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
|
||||
offset = index;
|
||||
else
|
||||
offset = index << 1;
|
||||
|
||||
if (eventsel)
|
||||
event_offsets[index] = offset;
|
||||
else
|
||||
count_offsets[index] = offset;
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static int amd_core_hw_config(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.exclude_host && event->attr.exclude_guest)
|
||||
/*
|
||||
* When HO == GO == 1 the hardware treats that as GO == HO == 0
|
||||
* and will count in both modes. We don't want to count in that
|
||||
* case so we emulate no-counting by setting US = OS = 0.
|
||||
*/
|
||||
event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
|
||||
ARCH_PERFMON_EVENTSEL_OS);
|
||||
else if (event->attr.exclude_host)
|
||||
event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
|
||||
else if (event->attr.exclude_guest)
|
||||
event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD64 events are detected based on their event codes.
|
||||
*/
|
||||
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
|
||||
{
|
||||
return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
|
||||
}
|
||||
|
||||
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
|
||||
{
|
||||
return (hwc->config & 0xe0) == 0xe0;
|
||||
}
|
||||
|
||||
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
struct amd_nb *nb = cpuc->amd_nb;
|
||||
|
||||
return nb && nb->nb_id != -1;
|
||||
}
|
||||
|
||||
static int amd_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* pass precise event sampling to ibs: */
|
||||
if (event->attr.precise_ip && get_ibs_caps())
|
||||
return -ENOENT;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ret = x86_pmu_hw_config(event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (event->attr.type == PERF_TYPE_RAW)
|
||||
event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
|
||||
|
||||
return amd_core_hw_config(event);
|
||||
}
|
||||
|
||||
static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct amd_nb *nb = cpuc->amd_nb;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* need to scan whole list because event may not have
|
||||
* been assigned during scheduling
|
||||
*
|
||||
* no race condition possible because event can only
|
||||
* be removed on one CPU at a time AND PMU is disabled
|
||||
* when we come here
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
if (cmpxchg(nb->owners + i, event, NULL) == event)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD64 NorthBridge events need special treatment because
|
||||
* counter access needs to be synchronized across all cores
|
||||
* of a package. Refer to BKDG section 3.12
|
||||
*
|
||||
* NB events are events measuring L3 cache, Hypertransport
|
||||
* traffic. They are identified by an event code >= 0xe00.
|
||||
* They measure events on the NorthBride which is shared
|
||||
* by all cores on a package. NB events are counted on a
|
||||
* shared set of counters. When a NB event is programmed
|
||||
* in a counter, the data actually comes from a shared
|
||||
* counter. Thus, access to those counters needs to be
|
||||
* synchronized.
|
||||
*
|
||||
* We implement the synchronization such that no two cores
|
||||
* can be measuring NB events using the same counters. Thus,
|
||||
* we maintain a per-NB allocation table. The available slot
|
||||
* is propagated using the event_constraint structure.
|
||||
*
|
||||
* We provide only one choice for each NB event based on
|
||||
* the fact that only NB events have restrictions. Consequently,
|
||||
* if a counter is available, there is a guarantee the NB event
|
||||
* will be assigned to it. If no slot is available, an empty
|
||||
* constraint is returned and scheduling will eventually fail
|
||||
* for this event.
|
||||
*
|
||||
* Note that all cores attached the same NB compete for the same
|
||||
* counters to host NB events, this is why we use atomic ops. Some
|
||||
* multi-chip CPUs may have more than one NB.
|
||||
*
|
||||
* Given that resources are allocated (cmpxchg), they must be
|
||||
* eventually freed for others to use. This is accomplished by
|
||||
* calling __amd_put_nb_event_constraints()
|
||||
*
|
||||
* Non NB events are not impacted by this restriction.
|
||||
*/
|
||||
static struct event_constraint *
|
||||
__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||
struct event_constraint *c)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct amd_nb *nb = cpuc->amd_nb;
|
||||
struct perf_event *old;
|
||||
int idx, new = -1;
|
||||
|
||||
if (!c)
|
||||
c = &unconstrained;
|
||||
|
||||
if (cpuc->is_fake)
|
||||
return c;
|
||||
|
||||
/*
|
||||
* detect if already present, if so reuse
|
||||
*
|
||||
* cannot merge with actual allocation
|
||||
* because of possible holes
|
||||
*
|
||||
* event can already be present yet not assigned (in hwc->idx)
|
||||
* because of successive calls to x86_schedule_events() from
|
||||
* hw_perf_group_sched_in() without hw_perf_enable()
|
||||
*/
|
||||
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
|
||||
if (new == -1 || hwc->idx == idx)
|
||||
/* assign free slot, prefer hwc->idx */
|
||||
old = cmpxchg(nb->owners + idx, NULL, event);
|
||||
else if (nb->owners[idx] == event)
|
||||
/* event already present */
|
||||
old = event;
|
||||
else
|
||||
continue;
|
||||
|
||||
if (old && old != event)
|
||||
continue;
|
||||
|
||||
/* reassign to this slot */
|
||||
if (new != -1)
|
||||
cmpxchg(nb->owners + new, event, NULL);
|
||||
new = idx;
|
||||
|
||||
/* already present, reuse */
|
||||
if (old == event)
|
||||
break;
|
||||
}
|
||||
|
||||
if (new == -1)
|
||||
return &emptyconstraint;
|
||||
|
||||
return &nb->event_constraints[new];
|
||||
}
|
||||
|
||||
static struct amd_nb *amd_alloc_nb(int cpu)
|
||||
{
|
||||
struct amd_nb *nb;
|
||||
int i;
|
||||
|
||||
nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (!nb)
|
||||
return NULL;
|
||||
|
||||
nb->nb_id = -1;
|
||||
|
||||
/*
|
||||
* initialize all possible NB constraints
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
__set_bit(i, nb->event_constraints[i].idxmsk);
|
||||
nb->event_constraints[i].weight = 1;
|
||||
}
|
||||
return nb;
|
||||
}
|
||||
|
||||
static int amd_pmu_cpu_prepare(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||
|
||||
WARN_ON_ONCE(cpuc->amd_nb);
|
||||
|
||||
if (boot_cpu_data.x86_max_cores < 2)
|
||||
return NOTIFY_OK;
|
||||
|
||||
cpuc->amd_nb = amd_alloc_nb(cpu);
|
||||
if (!cpuc->amd_nb)
|
||||
return NOTIFY_BAD;
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static void amd_pmu_cpu_starting(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||
void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
|
||||
struct amd_nb *nb;
|
||||
int i, nb_id;
|
||||
|
||||
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
|
||||
|
||||
if (boot_cpu_data.x86_max_cores < 2)
|
||||
return;
|
||||
|
||||
nb_id = amd_get_nb_id(cpu);
|
||||
WARN_ON_ONCE(nb_id == BAD_APICID);
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
nb = per_cpu(cpu_hw_events, i).amd_nb;
|
||||
if (WARN_ON_ONCE(!nb))
|
||||
continue;
|
||||
|
||||
if (nb->nb_id == nb_id) {
|
||||
*onln = cpuc->amd_nb;
|
||||
cpuc->amd_nb = nb;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cpuc->amd_nb->nb_id = nb_id;
|
||||
cpuc->amd_nb->refcnt++;
|
||||
}
|
||||
|
||||
static void amd_pmu_cpu_dead(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuhw;
|
||||
|
||||
if (boot_cpu_data.x86_max_cores < 2)
|
||||
return;
|
||||
|
||||
cpuhw = &per_cpu(cpu_hw_events, cpu);
|
||||
|
||||
if (cpuhw->amd_nb) {
|
||||
struct amd_nb *nb = cpuhw->amd_nb;
|
||||
|
||||
if (nb->nb_id == -1 || --nb->refcnt == 0)
|
||||
kfree(nb);
|
||||
|
||||
cpuhw->amd_nb = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
/*
|
||||
* if not NB event or no NB, then no constraints
|
||||
*/
|
||||
if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
|
||||
return &unconstrained;
|
||||
|
||||
return __amd_get_nb_event_constraints(cpuc, event, NULL);
|
||||
}
|
||||
|
||||
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
|
||||
__amd_put_nb_event_constraints(cpuc, event);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7,32-35");
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
|
||||
static struct attribute *amd_format_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* AMD Family 15h */
|
||||
|
||||
#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
|
||||
|
||||
#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
|
||||
#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
|
||||
#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
|
||||
#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
|
||||
#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
|
||||
#define AMD_EVENT_EX_LS 0x000000C0ULL
|
||||
#define AMD_EVENT_DE 0x000000D0ULL
|
||||
#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
|
||||
|
||||
/*
|
||||
* AMD family 15h event code/PMC mappings:
|
||||
*
|
||||
* type = event_code & 0x0F0:
|
||||
*
|
||||
* 0x000 FP PERF_CTL[5:3]
|
||||
* 0x010 FP PERF_CTL[5:3]
|
||||
* 0x020 LS PERF_CTL[5:0]
|
||||
* 0x030 LS PERF_CTL[5:0]
|
||||
* 0x040 DC PERF_CTL[5:0]
|
||||
* 0x050 DC PERF_CTL[5:0]
|
||||
* 0x060 CU PERF_CTL[2:0]
|
||||
* 0x070 CU PERF_CTL[2:0]
|
||||
* 0x080 IC/DE PERF_CTL[2:0]
|
||||
* 0x090 IC/DE PERF_CTL[2:0]
|
||||
* 0x0A0 ---
|
||||
* 0x0B0 ---
|
||||
* 0x0C0 EX/LS PERF_CTL[5:0]
|
||||
* 0x0D0 DE PERF_CTL[2:0]
|
||||
* 0x0E0 NB NB_PERF_CTL[3:0]
|
||||
* 0x0F0 NB NB_PERF_CTL[3:0]
|
||||
*
|
||||
* Exceptions:
|
||||
*
|
||||
* 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*)
|
||||
* 0x003 FP PERF_CTL[3]
|
||||
* 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*)
|
||||
* 0x00B FP PERF_CTL[3]
|
||||
* 0x00D FP PERF_CTL[3]
|
||||
* 0x023 DE PERF_CTL[2:0]
|
||||
* 0x02D LS PERF_CTL[3]
|
||||
* 0x02E LS PERF_CTL[3,0]
|
||||
* 0x031 LS PERF_CTL[2:0] (**)
|
||||
* 0x043 CU PERF_CTL[2:0]
|
||||
* 0x045 CU PERF_CTL[2:0]
|
||||
* 0x046 CU PERF_CTL[2:0]
|
||||
* 0x054 CU PERF_CTL[2:0]
|
||||
* 0x055 CU PERF_CTL[2:0]
|
||||
* 0x08F IC PERF_CTL[0]
|
||||
* 0x187 DE PERF_CTL[0]
|
||||
* 0x188 DE PERF_CTL[0]
|
||||
* 0x0DB EX PERF_CTL[5:0]
|
||||
* 0x0DC LS PERF_CTL[5:0]
|
||||
* 0x0DD LS PERF_CTL[5:0]
|
||||
* 0x0DE LS PERF_CTL[5:0]
|
||||
* 0x0DF LS PERF_CTL[5:0]
|
||||
* 0x1C0 EX PERF_CTL[5:3]
|
||||
* 0x1D6 EX PERF_CTL[5:0]
|
||||
* 0x1D8 EX PERF_CTL[5:0]
|
||||
*
|
||||
* (*) depending on the umask all FPU counters may be used
|
||||
* (**) only one unitmask enabled at a time
|
||||
*/
|
||||
|
||||
static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
|
||||
static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
|
||||
static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
|
||||
static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
|
||||
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
|
||||
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
|
||||
|
||||
static struct event_constraint *
|
||||
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned int event_code = amd_get_event_code(hwc);
|
||||
|
||||
switch (event_code & AMD_EVENT_TYPE_MASK) {
|
||||
case AMD_EVENT_FP:
|
||||
switch (event_code) {
|
||||
case 0x000:
|
||||
if (!(hwc->config & 0x0000F000ULL))
|
||||
break;
|
||||
if (!(hwc->config & 0x00000F00ULL))
|
||||
break;
|
||||
return &amd_f15_PMC3;
|
||||
case 0x004:
|
||||
if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
|
||||
break;
|
||||
return &amd_f15_PMC3;
|
||||
case 0x003:
|
||||
case 0x00B:
|
||||
case 0x00D:
|
||||
return &amd_f15_PMC3;
|
||||
}
|
||||
return &amd_f15_PMC53;
|
||||
case AMD_EVENT_LS:
|
||||
case AMD_EVENT_DC:
|
||||
case AMD_EVENT_EX_LS:
|
||||
switch (event_code) {
|
||||
case 0x023:
|
||||
case 0x043:
|
||||
case 0x045:
|
||||
case 0x046:
|
||||
case 0x054:
|
||||
case 0x055:
|
||||
return &amd_f15_PMC20;
|
||||
case 0x02D:
|
||||
return &amd_f15_PMC3;
|
||||
case 0x02E:
|
||||
return &amd_f15_PMC30;
|
||||
case 0x031:
|
||||
if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
|
||||
return &amd_f15_PMC20;
|
||||
return &emptyconstraint;
|
||||
case 0x1C0:
|
||||
return &amd_f15_PMC53;
|
||||
default:
|
||||
return &amd_f15_PMC50;
|
||||
}
|
||||
case AMD_EVENT_CU:
|
||||
case AMD_EVENT_IC_DE:
|
||||
case AMD_EVENT_DE:
|
||||
switch (event_code) {
|
||||
case 0x08F:
|
||||
case 0x187:
|
||||
case 0x188:
|
||||
return &amd_f15_PMC0;
|
||||
case 0x0DB ... 0x0DF:
|
||||
case 0x1D6:
|
||||
case 0x1D8:
|
||||
return &amd_f15_PMC50;
|
||||
default:
|
||||
return &amd_f15_PMC20;
|
||||
}
|
||||
case AMD_EVENT_NB:
|
||||
/* moved to perf_event_amd_uncore.c */
|
||||
return &emptyconstraint;
|
||||
default:
|
||||
return &emptyconstraint;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t amd_event_sysfs_show(char *page, u64 config)
|
||||
{
|
||||
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
|
||||
(config & AMD64_EVENTSEL_EVENT) >> 24;
|
||||
|
||||
return x86_event_sysfs_show(page, config, event);
|
||||
}
|
||||
|
||||
static __initconst const struct x86_pmu amd_pmu = {
|
||||
.name = "AMD",
|
||||
.handle_irq = x86_pmu_handle_irq,
|
||||
.disable_all = x86_pmu_disable_all,
|
||||
.enable_all = x86_pmu_enable_all,
|
||||
.enable = x86_pmu_enable_event,
|
||||
.disable = x86_pmu_disable_event,
|
||||
.hw_config = amd_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_K7_EVNTSEL0,
|
||||
.perfctr = MSR_K7_PERFCTR0,
|
||||
.addr_offset = amd_pmu_addr_offset,
|
||||
.event_map = amd_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
|
||||
.num_counters = AMD64_NUM_COUNTERS,
|
||||
.cntval_bits = 48,
|
||||
.cntval_mask = (1ULL << 48) - 1,
|
||||
.apic = 1,
|
||||
/* use highest bit to detect overflow */
|
||||
.max_period = (1ULL << 47) - 1,
|
||||
.get_event_constraints = amd_get_event_constraints,
|
||||
.put_event_constraints = amd_put_event_constraints,
|
||||
|
||||
.format_attrs = amd_format_attr,
|
||||
.events_sysfs_show = amd_event_sysfs_show,
|
||||
|
||||
.cpu_prepare = amd_pmu_cpu_prepare,
|
||||
.cpu_starting = amd_pmu_cpu_starting,
|
||||
.cpu_dead = amd_pmu_cpu_dead,
|
||||
};
|
||||
|
||||
static int __init amd_core_pmu_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
|
||||
return 0;
|
||||
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 0x15:
|
||||
pr_cont("Fam15h ");
|
||||
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_err("core perfctr but no constraints; unknown hardware!\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* If core performance counter extensions exists, we must use
|
||||
* MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
|
||||
* amd_pmu_addr_offset().
|
||||
*/
|
||||
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
|
||||
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
|
||||
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
|
||||
|
||||
pr_cont("core perfctr, ");
|
||||
return 0;
|
||||
}
|
||||
|
||||
__init int amd_pmu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Performance-monitoring supported from K7 and later: */
|
||||
if (boot_cpu_data.x86 < 6)
|
||||
return -ENODEV;
|
||||
|
||||
x86_pmu = amd_pmu;
|
||||
|
||||
ret = amd_core_pmu_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Events are common for all AMDs */
|
||||
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amd_pmu_enable_virt(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
cpuc->perf_ctr_virt_mask = 0;
|
||||
|
||||
/* Reload all events */
|
||||
x86_pmu_disable_all();
|
||||
x86_pmu_enable_all(0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
|
||||
|
||||
void amd_pmu_disable_virt(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
/*
|
||||
* We only mask out the Host-only bit so that host-only counting works
|
||||
* when SVM is disabled. If someone sets up a guest-only counter when
|
||||
* SVM is disabled the Guest-only bits still gets set and the counter
|
||||
* will not count anything.
|
||||
*/
|
||||
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
|
||||
|
||||
/* Reload all events */
|
||||
x86_pmu_disable_all();
|
||||
x86_pmu_enable_all(0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
|
@@ -1,959 +0,0 @@
|
||||
/*
|
||||
* Performance events - AMD IBS
|
||||
*
|
||||
* Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
|
||||
*
|
||||
* For licencing details see kernel-base/COPYING
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static u32 ibs_caps;
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
|
||||
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/nmi.h>
|
||||
|
||||
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
|
||||
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
|
||||
|
||||
enum ibs_states {
|
||||
IBS_ENABLED = 0,
|
||||
IBS_STARTED = 1,
|
||||
IBS_STOPPING = 2,
|
||||
|
||||
IBS_MAX_STATES,
|
||||
};
|
||||
|
||||
struct cpu_perf_ibs {
|
||||
struct perf_event *event;
|
||||
unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
|
||||
};
|
||||
|
||||
struct perf_ibs {
|
||||
struct pmu pmu;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 cnt_mask;
|
||||
u64 enable_mask;
|
||||
u64 valid_mask;
|
||||
u64 max_period;
|
||||
unsigned long offset_mask[1];
|
||||
int offset_max;
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
|
||||
struct attribute **format_attrs;
|
||||
struct attribute_group format_group;
|
||||
const struct attribute_group *attr_groups[2];
|
||||
|
||||
u64 (*get_count)(u64 config);
|
||||
};
|
||||
|
||||
struct perf_ibs_data {
|
||||
u32 size;
|
||||
union {
|
||||
u32 data[0]; /* data buffer starts here */
|
||||
u32 caps;
|
||||
};
|
||||
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
|
||||
};
|
||||
|
||||
static int
|
||||
perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
|
||||
{
|
||||
s64 left = local64_read(&hwc->period_left);
|
||||
s64 period = hwc->sample_period;
|
||||
int overflow = 0;
|
||||
|
||||
/*
|
||||
* If we are way outside a reasonable range then just skip forward:
|
||||
*/
|
||||
if (unlikely(left <= -period)) {
|
||||
left = period;
|
||||
local64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
overflow = 1;
|
||||
}
|
||||
|
||||
if (unlikely(left < (s64)min)) {
|
||||
left += period;
|
||||
local64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
overflow = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the hw period that triggers the sw overflow is too short
|
||||
* we might hit the irq handler. This biases the results.
|
||||
* Thus we shorten the next-to-last period and set the last
|
||||
* period to the max period.
|
||||
*/
|
||||
if (left > max) {
|
||||
left -= max;
|
||||
if (left > max)
|
||||
left = max;
|
||||
else if (left < min)
|
||||
left = min;
|
||||
}
|
||||
|
||||
*hw_period = (u64)left;
|
||||
|
||||
return overflow;
|
||||
}
|
||||
|
||||
static int
|
||||
perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int shift = 64 - width;
|
||||
u64 prev_raw_count;
|
||||
u64 delta;
|
||||
|
||||
/*
|
||||
* Careful: an NMI might modify the previous event value.
|
||||
*
|
||||
* Our tactic to handle this is to first atomically read and
|
||||
* exchange a new raw count - then add that new-prev delta
|
||||
* count to the generic event atomically:
|
||||
*/
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Now we have the new raw value and have updated the prev
|
||||
* timestamp already. We can now calculate the elapsed delta
|
||||
* (event-)time and add that to the generic event.
|
||||
*
|
||||
* Careful, not all hw sign-extends above the physical width
|
||||
* of the count.
|
||||
*/
|
||||
delta = (new_raw_count << shift) - (prev_raw_count << shift);
|
||||
delta >>= shift;
|
||||
|
||||
local64_add(delta, &event->count);
|
||||
local64_sub(delta, &hwc->period_left);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct perf_ibs perf_ibs_fetch;
|
||||
static struct perf_ibs perf_ibs_op;
|
||||
|
||||
static struct perf_ibs *get_ibs_pmu(int type)
|
||||
{
|
||||
if (perf_ibs_fetch.pmu.type == type)
|
||||
return &perf_ibs_fetch;
|
||||
if (perf_ibs_op.pmu.type == type)
|
||||
return &perf_ibs_op;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use IBS for precise event sampling:
|
||||
*
|
||||
* perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
|
||||
* perf record -a -e r076:p ... # same as -e cpu-cycles:p
|
||||
* perf record -a -e r0C1:p ... # use ibs op counting micro-ops
|
||||
*
|
||||
* IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
|
||||
* MSRC001_1033) is used to select either cycle or micro-ops counting
|
||||
* mode.
|
||||
*
|
||||
* The rip of IBS samples has skid 0. Thus, IBS supports precise
|
||||
* levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
|
||||
* rip is invalid when IBS was not able to record the rip correctly.
|
||||
* We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
|
||||
*
|
||||
*/
|
||||
static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
|
||||
{
|
||||
switch (event->attr.precise_ip) {
|
||||
case 0:
|
||||
return -ENOENT;
|
||||
case 1:
|
||||
case 2:
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_HARDWARE:
|
||||
switch (event->attr.config) {
|
||||
case PERF_COUNT_HW_CPU_CYCLES:
|
||||
*config = 0;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case PERF_TYPE_RAW:
|
||||
switch (event->attr.config) {
|
||||
case 0x0076:
|
||||
*config = 0;
|
||||
return 0;
|
||||
case 0x00C1:
|
||||
*config = IBS_OP_CNT_CTL;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static const struct perf_event_attr ibs_notsupp = {
|
||||
.exclude_user = 1,
|
||||
.exclude_kernel = 1,
|
||||
.exclude_hv = 1,
|
||||
.exclude_idle = 1,
|
||||
.exclude_host = 1,
|
||||
.exclude_guest = 1,
|
||||
};
|
||||
|
||||
static int perf_ibs_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs;
|
||||
u64 max_cnt, config;
|
||||
int ret;
|
||||
|
||||
perf_ibs = get_ibs_pmu(event->attr.type);
|
||||
if (perf_ibs) {
|
||||
config = event->attr.config;
|
||||
} else {
|
||||
perf_ibs = &perf_ibs_op;
|
||||
ret = perf_ibs_precise_event(event, &config);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (event->pmu != &perf_ibs->pmu)
|
||||
return -ENOENT;
|
||||
|
||||
if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
|
||||
return -EINVAL;
|
||||
|
||||
if (config & ~perf_ibs->config_mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (hwc->sample_period) {
|
||||
if (config & perf_ibs->cnt_mask)
|
||||
/* raw max_cnt may not be set */
|
||||
return -EINVAL;
|
||||
if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
|
||||
/*
|
||||
* lower 4 bits can not be set in ibs max cnt,
|
||||
* but allowing it in case we adjust the
|
||||
* sample period to set a frequency.
|
||||
*/
|
||||
return -EINVAL;
|
||||
hwc->sample_period &= ~0x0FULL;
|
||||
if (!hwc->sample_period)
|
||||
hwc->sample_period = 0x10;
|
||||
} else {
|
||||
max_cnt = config & perf_ibs->cnt_mask;
|
||||
config &= ~perf_ibs->cnt_mask;
|
||||
event->attr.sample_period = max_cnt << 4;
|
||||
hwc->sample_period = event->attr.sample_period;
|
||||
}
|
||||
|
||||
if (!hwc->sample_period)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If we modify hwc->sample_period, we also need to update
|
||||
* hwc->last_period and hwc->period_left.
|
||||
*/
|
||||
hwc->last_period = hwc->sample_period;
|
||||
local64_set(&hwc->period_left, hwc->sample_period);
|
||||
|
||||
hwc->config_base = perf_ibs->msr;
|
||||
hwc->config = config;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 *period)
|
||||
{
|
||||
int overflow;
|
||||
|
||||
/* ignore lower 4 bits in min count: */
|
||||
overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
|
||||
local64_set(&hwc->prev_count, 0);
|
||||
|
||||
return overflow;
|
||||
}
|
||||
|
||||
static u64 get_ibs_fetch_count(u64 config)
|
||||
{
|
||||
return (config & IBS_FETCH_CNT) >> 12;
|
||||
}
|
||||
|
||||
static u64 get_ibs_op_count(u64 config)
|
||||
{
|
||||
u64 count = 0;
|
||||
|
||||
if (config & IBS_OP_VAL)
|
||||
count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
|
||||
|
||||
if (ibs_caps & IBS_CAPS_RDWROPCNT)
|
||||
count += (config & IBS_OP_CUR_CNT) >> 32;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static void
|
||||
perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
|
||||
u64 *config)
|
||||
{
|
||||
u64 count = perf_ibs->get_count(*config);
|
||||
|
||||
/*
|
||||
* Set width to 64 since we do not overflow on max width but
|
||||
* instead on max count. In perf_ibs_set_period() we clear
|
||||
* prev count manually on overflow.
|
||||
*/
|
||||
while (!perf_event_try_update(event, count, 64)) {
|
||||
rdmsrl(event->hw.config_base, *config);
|
||||
count = perf_ibs->get_count(*config);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 config)
|
||||
{
|
||||
wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Erratum #420 Instruction-Based Sampling Engine May Generate
|
||||
* Interrupt that Cannot Be Cleared:
|
||||
*
|
||||
* Must clear counter mask first, then clear the enable bit. See
|
||||
* Revision Guide for AMD Family 10h Processors, Publication #41322.
|
||||
*/
|
||||
static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 config)
|
||||
{
|
||||
config &= ~perf_ibs->cnt_mask;
|
||||
wrmsrl(hwc->config_base, config);
|
||||
config &= ~perf_ibs->enable_mask;
|
||||
wrmsrl(hwc->config_base, config);
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot restore the ibs pmu state, so we always needs to update
|
||||
* the event while stopping it and then reset the state when starting
|
||||
* again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
|
||||
* perf_ibs_start()/perf_ibs_stop() and instead always do it.
|
||||
*/
|
||||
static void perf_ibs_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
u64 period;
|
||||
|
||||
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
|
||||
hwc->state = 0;
|
||||
|
||||
perf_ibs_set_period(perf_ibs, hwc, &period);
|
||||
set_bit(IBS_STARTED, pcpu->state);
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static void perf_ibs_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
u64 config;
|
||||
int stopping;
|
||||
|
||||
stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
|
||||
|
||||
if (!stopping && (hwc->state & PERF_HES_UPTODATE))
|
||||
return;
|
||||
|
||||
rdmsrl(hwc->config_base, config);
|
||||
|
||||
if (stopping) {
|
||||
set_bit(IBS_STOPPING, pcpu->state);
|
||||
perf_ibs_disable_event(perf_ibs, hwc, config);
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Clear valid bit to not count rollovers on update, rollovers
|
||||
* are only updated in the irq handler.
|
||||
*/
|
||||
config &= ~perf_ibs->valid_mask;
|
||||
|
||||
perf_ibs_event_update(perf_ibs, event, &config);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
static int perf_ibs_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
|
||||
if (test_and_set_bit(IBS_ENABLED, pcpu->state))
|
||||
return -ENOSPC;
|
||||
|
||||
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
pcpu->event = event;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
perf_ibs_start(event, PERF_EF_RELOAD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_ibs_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
|
||||
if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
|
||||
return;
|
||||
|
||||
perf_ibs_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
pcpu->event = NULL;
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static void perf_ibs_read(struct perf_event *event) { }
|
||||
|
||||
PMU_FORMAT_ATTR(rand_en, "config:57");
|
||||
PMU_FORMAT_ATTR(cnt_ctl, "config:19");
|
||||
|
||||
static struct attribute *ibs_fetch_format_attrs[] = {
|
||||
&format_attr_rand_en.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *ibs_op_format_attrs[] = {
|
||||
NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct perf_ibs perf_ibs_fetch = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
|
||||
.event_init = perf_ibs_init,
|
||||
.add = perf_ibs_add,
|
||||
.del = perf_ibs_del,
|
||||
.start = perf_ibs_start,
|
||||
.stop = perf_ibs_stop,
|
||||
.read = perf_ibs_read,
|
||||
},
|
||||
.msr = MSR_AMD64_IBSFETCHCTL,
|
||||
.config_mask = IBS_FETCH_CONFIG_MASK,
|
||||
.cnt_mask = IBS_FETCH_MAX_CNT,
|
||||
.enable_mask = IBS_FETCH_ENABLE,
|
||||
.valid_mask = IBS_FETCH_VAL,
|
||||
.max_period = IBS_FETCH_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
|
||||
.format_attrs = ibs_fetch_format_attrs,
|
||||
|
||||
.get_count = get_ibs_fetch_count,
|
||||
};
|
||||
|
||||
static struct perf_ibs perf_ibs_op = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
|
||||
.event_init = perf_ibs_init,
|
||||
.add = perf_ibs_add,
|
||||
.del = perf_ibs_del,
|
||||
.start = perf_ibs_start,
|
||||
.stop = perf_ibs_stop,
|
||||
.read = perf_ibs_read,
|
||||
},
|
||||
.msr = MSR_AMD64_IBSOPCTL,
|
||||
.config_mask = IBS_OP_CONFIG_MASK,
|
||||
.cnt_mask = IBS_OP_MAX_CNT,
|
||||
.enable_mask = IBS_OP_ENABLE,
|
||||
.valid_mask = IBS_OP_VAL,
|
||||
.max_period = IBS_OP_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
|
||||
.format_attrs = ibs_op_format_attrs,
|
||||
|
||||
.get_count = get_ibs_op_count,
|
||||
};
|
||||
|
||||
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
||||
{
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
struct perf_event *event = pcpu->event;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_sample_data data;
|
||||
struct perf_raw_record raw;
|
||||
struct pt_regs regs;
|
||||
struct perf_ibs_data ibs_data;
|
||||
int offset, size, check_rip, offset_max, throttle = 0;
|
||||
unsigned int msr;
|
||||
u64 *buf, *config, period;
|
||||
|
||||
if (!test_bit(IBS_STARTED, pcpu->state)) {
|
||||
/*
|
||||
* Catch spurious interrupts after stopping IBS: After
|
||||
* disabling IBS there could be still incoming NMIs
|
||||
* with samples that even have the valid bit cleared.
|
||||
* Mark all this NMIs as handled.
|
||||
*/
|
||||
return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
|
||||
}
|
||||
|
||||
msr = hwc->config_base;
|
||||
buf = ibs_data.regs;
|
||||
rdmsrl(msr, *buf);
|
||||
if (!(*buf++ & perf_ibs->valid_mask))
|
||||
return 0;
|
||||
|
||||
config = &ibs_data.regs[0];
|
||||
perf_ibs_event_update(perf_ibs, event, config);
|
||||
perf_sample_data_init(&data, 0, hwc->last_period);
|
||||
if (!perf_ibs_set_period(perf_ibs, hwc, &period))
|
||||
goto out; /* no sw counter overflow */
|
||||
|
||||
ibs_data.caps = ibs_caps;
|
||||
size = 1;
|
||||
offset = 1;
|
||||
check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW)
|
||||
offset_max = perf_ibs->offset_max;
|
||||
else if (check_rip)
|
||||
offset_max = 2;
|
||||
else
|
||||
offset_max = 1;
|
||||
do {
|
||||
rdmsrl(msr + offset, *buf++);
|
||||
size++;
|
||||
offset = find_next_bit(perf_ibs->offset_mask,
|
||||
perf_ibs->offset_max,
|
||||
offset + 1);
|
||||
} while (offset < offset_max);
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
/*
|
||||
* Read IbsBrTarget and IbsOpData4 separately
|
||||
* depending on their availability.
|
||||
* Can't add to offset_max as they are staggered
|
||||
*/
|
||||
if (ibs_caps & IBS_CAPS_BRNTRGT) {
|
||||
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
|
||||
size++;
|
||||
}
|
||||
if (ibs_caps & IBS_CAPS_OPDATA4) {
|
||||
rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
|
||||
size++;
|
||||
}
|
||||
}
|
||||
ibs_data.size = sizeof(u64) * size;
|
||||
|
||||
regs = *iregs;
|
||||
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
|
||||
regs.flags &= ~PERF_EFLAGS_EXACT;
|
||||
} else {
|
||||
set_linear_ip(®s, ibs_data.regs[1]);
|
||||
regs.flags |= PERF_EFLAGS_EXACT;
|
||||
}
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.size = sizeof(u32) + ibs_data.size;
|
||||
raw.data = ibs_data.data;
|
||||
data.raw = &raw;
|
||||
}
|
||||
|
||||
throttle = perf_event_overflow(event, &data, ®s);
|
||||
out:
|
||||
if (throttle)
|
||||
perf_ibs_disable_event(perf_ibs, hwc, *config);
|
||||
else
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
|
||||
{
|
||||
int handled = 0;
|
||||
|
||||
handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
|
||||
handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
|
||||
|
||||
if (handled)
|
||||
inc_irq_stat(apic_perf_irqs);
|
||||
|
||||
return handled;
|
||||
}
|
||||
NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
|
||||
|
||||
static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
|
||||
{
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
int ret;
|
||||
|
||||
pcpu = alloc_percpu(struct cpu_perf_ibs);
|
||||
if (!pcpu)
|
||||
return -ENOMEM;
|
||||
|
||||
perf_ibs->pcpu = pcpu;
|
||||
|
||||
/* register attributes */
|
||||
if (perf_ibs->format_attrs[0]) {
|
||||
memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
|
||||
perf_ibs->format_group.name = "format";
|
||||
perf_ibs->format_group.attrs = perf_ibs->format_attrs;
|
||||
|
||||
memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
|
||||
perf_ibs->attr_groups[0] = &perf_ibs->format_group;
|
||||
perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
|
||||
}
|
||||
|
||||
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
|
||||
if (ret) {
|
||||
perf_ibs->pcpu = NULL;
|
||||
free_percpu(pcpu);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __init int perf_event_ibs_init(void)
|
||||
{
|
||||
struct attribute **attr = ibs_op_format_attrs;
|
||||
|
||||
if (!ibs_caps)
|
||||
return -ENODEV; /* ibs not supported by the cpu */
|
||||
|
||||
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
|
||||
|
||||
if (ibs_caps & IBS_CAPS_OPCNT) {
|
||||
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
|
||||
*attr++ = &format_attr_cnt_ctl.attr;
|
||||
}
|
||||
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
|
||||
|
||||
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
|
||||
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
|
||||
|
||||
static __init int perf_event_ibs_init(void) { return 0; }
|
||||
|
||||
#endif
|
||||
|
||||
/* IBS - apic initialization, for perf and oprofile */
|
||||
|
||||
static __init u32 __get_ibs_caps(void)
|
||||
{
|
||||
u32 caps;
|
||||
unsigned int max_level;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_IBS))
|
||||
return 0;
|
||||
|
||||
/* check IBS cpuid feature flags */
|
||||
max_level = cpuid_eax(0x80000000);
|
||||
if (max_level < IBS_CPUID_FEATURES)
|
||||
return IBS_CAPS_DEFAULT;
|
||||
|
||||
caps = cpuid_eax(IBS_CPUID_FEATURES);
|
||||
if (!(caps & IBS_CAPS_AVAIL))
|
||||
/* cpuid flags not valid */
|
||||
return IBS_CAPS_DEFAULT;
|
||||
|
||||
return caps;
|
||||
}
|
||||
|
||||
u32 get_ibs_caps(void)
|
||||
{
|
||||
return ibs_caps;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(get_ibs_caps);
|
||||
|
||||
static inline int get_eilvt(int offset)
|
||||
{
|
||||
return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
|
||||
}
|
||||
|
||||
static inline int put_eilvt(int offset)
|
||||
{
|
||||
return !setup_APIC_eilvt(offset, 0, 0, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check and reserve APIC extended interrupt LVT offset for IBS if available.
|
||||
*/
|
||||
static inline int ibs_eilvt_valid(void)
|
||||
{
|
||||
int offset;
|
||||
u64 val;
|
||||
int valid = 0;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
rdmsrl(MSR_AMD64_IBSCTL, val);
|
||||
offset = val & IBSCTL_LVT_OFFSET_MASK;
|
||||
|
||||
if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
|
||||
pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
|
||||
smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!get_eilvt(offset)) {
|
||||
pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
|
||||
smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
|
||||
goto out;
|
||||
}
|
||||
|
||||
valid = 1;
|
||||
out:
|
||||
preempt_enable();
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
static int setup_ibs_ctl(int ibs_eilvt_off)
|
||||
{
|
||||
struct pci_dev *cpu_cfg;
|
||||
int nodes;
|
||||
u32 value = 0;
|
||||
|
||||
nodes = 0;
|
||||
cpu_cfg = NULL;
|
||||
do {
|
||||
cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
|
||||
PCI_DEVICE_ID_AMD_10H_NB_MISC,
|
||||
cpu_cfg);
|
||||
if (!cpu_cfg)
|
||||
break;
|
||||
++nodes;
|
||||
pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
|
||||
| IBSCTL_LVT_OFFSET_VALID);
|
||||
pci_read_config_dword(cpu_cfg, IBSCTL, &value);
|
||||
if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
|
||||
pci_dev_put(cpu_cfg);
|
||||
printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
|
||||
"IBSCTL = 0x%08x\n", value);
|
||||
return -EINVAL;
|
||||
}
|
||||
} while (1);
|
||||
|
||||
if (!nodes) {
|
||||
printk(KERN_DEBUG "No CPU node configured for IBS\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This runs only on the current cpu. We try to find an LVT offset and
|
||||
* setup the local APIC. For this we must disable preemption. On
|
||||
* success we initialize all nodes with this offset. This updates then
|
||||
* the offset in the IBS_CTL per-node msr. The per-core APIC setup of
|
||||
* the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
|
||||
* is using the new offset.
|
||||
*/
|
||||
static void force_ibs_eilvt_setup(void)
|
||||
{
|
||||
int offset;
|
||||
int ret;
|
||||
|
||||
preempt_disable();
|
||||
/* find the next free available EILVT entry, skip offset 0 */
|
||||
for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
|
||||
if (get_eilvt(offset))
|
||||
break;
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
if (offset == APIC_EILVT_NR_MAX) {
|
||||
printk(KERN_DEBUG "No EILVT entry available\n");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = setup_ibs_ctl(offset);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!ibs_eilvt_valid())
|
||||
goto out;
|
||||
|
||||
pr_info("IBS: LVT offset %d assigned\n", offset);
|
||||
|
||||
return;
|
||||
out:
|
||||
preempt_disable();
|
||||
put_eilvt(offset);
|
||||
preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
static void ibs_eilvt_setup(void)
|
||||
{
|
||||
/*
|
||||
* Force LVT offset assignment for family 10h: The offsets are
|
||||
* not assigned by the BIOS for this family, so the OS is
|
||||
* responsible for doing it. If the OS assignment fails, fall
|
||||
* back to BIOS settings and try to setup this.
|
||||
*/
|
||||
if (boot_cpu_data.x86 == 0x10)
|
||||
force_ibs_eilvt_setup();
|
||||
}
|
||||
|
||||
static inline int get_ibs_lvt_offset(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_AMD64_IBSCTL, val);
|
||||
if (!(val & IBSCTL_LVT_OFFSET_VALID))
|
||||
return -EINVAL;
|
||||
|
||||
return val & IBSCTL_LVT_OFFSET_MASK;
|
||||
}
|
||||
|
||||
static void setup_APIC_ibs(void *dummy)
|
||||
{
|
||||
int offset;
|
||||
|
||||
offset = get_ibs_lvt_offset();
|
||||
if (offset < 0)
|
||||
goto failed;
|
||||
|
||||
if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
|
||||
return;
|
||||
failed:
|
||||
pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
static void clear_APIC_ibs(void *dummy)
|
||||
{
|
||||
int offset;
|
||||
|
||||
offset = get_ibs_lvt_offset();
|
||||
if (offset >= 0)
|
||||
setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
|
||||
static int perf_ibs_suspend(void)
|
||||
{
|
||||
clear_APIC_ibs(NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_ibs_resume(void)
|
||||
{
|
||||
ibs_eilvt_setup();
|
||||
setup_APIC_ibs(NULL);
|
||||
}
|
||||
|
||||
static struct syscore_ops perf_ibs_syscore_ops = {
|
||||
.resume = perf_ibs_resume,
|
||||
.suspend = perf_ibs_suspend,
|
||||
};
|
||||
|
||||
static void perf_ibs_pm_init(void)
|
||||
{
|
||||
register_syscore_ops(&perf_ibs_syscore_ops);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void perf_ibs_pm_init(void) { }
|
||||
|
||||
#endif
|
||||
|
||||
static int
|
||||
perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_STARTING:
|
||||
setup_APIC_ibs(NULL);
|
||||
break;
|
||||
case CPU_DYING:
|
||||
clear_APIC_ibs(NULL);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static __init int amd_ibs_init(void)
|
||||
{
|
||||
u32 caps;
|
||||
int ret = -EINVAL;
|
||||
|
||||
caps = __get_ibs_caps();
|
||||
if (!caps)
|
||||
return -ENODEV; /* ibs not supported by the cpu */
|
||||
|
||||
ibs_eilvt_setup();
|
||||
|
||||
if (!ibs_eilvt_valid())
|
||||
goto out;
|
||||
|
||||
perf_ibs_pm_init();
|
||||
cpu_notifier_register_begin();
|
||||
ibs_caps = caps;
|
||||
/* make ibs_caps visible to other cpus: */
|
||||
smp_mb();
|
||||
smp_call_function(setup_APIC_ibs, NULL, 1);
|
||||
__perf_cpu_notifier(perf_ibs_cpu_notifier);
|
||||
cpu_notifier_register_done();
|
||||
|
||||
ret = perf_event_ibs_init();
|
||||
out:
|
||||
if (ret)
|
||||
pr_err("Failed to setup IBS, %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Since we need the pci subsystem to init ibs we can't do this earlier: */
|
||||
device_initcall(amd_ibs_init);
|
@@ -1,499 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Steven Kinney <Steven.Kinney@amd.com>
|
||||
* Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
|
||||
*
|
||||
* Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
#include "perf_event_amd_iommu.h"
|
||||
|
||||
#define COUNTER_SHIFT 16
|
||||
|
||||
#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
|
||||
#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
|
||||
|
||||
/* iommu pmu config masks */
|
||||
#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
|
||||
#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
|
||||
#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
|
||||
#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
|
||||
#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
|
||||
#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
|
||||
#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
|
||||
|
||||
static struct perf_amd_iommu __perf_iommu;
|
||||
|
||||
struct perf_amd_iommu {
|
||||
struct pmu pmu;
|
||||
u8 max_banks;
|
||||
u8 max_counters;
|
||||
u64 cntr_assign_mask;
|
||||
raw_spinlock_t lock;
|
||||
const struct attribute_group *attr_groups[4];
|
||||
};
|
||||
|
||||
#define format_group attr_groups[0]
|
||||
#define cpumask_group attr_groups[1]
|
||||
#define events_group attr_groups[2]
|
||||
#define null_group attr_groups[3]
|
||||
|
||||
/*---------------------------------------------
|
||||
* sysfs format attributes
|
||||
*---------------------------------------------*/
|
||||
PMU_FORMAT_ATTR(csource, "config:0-7");
|
||||
PMU_FORMAT_ATTR(devid, "config:8-23");
|
||||
PMU_FORMAT_ATTR(pasid, "config:24-39");
|
||||
PMU_FORMAT_ATTR(domid, "config:40-55");
|
||||
PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
|
||||
PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
|
||||
PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
|
||||
|
||||
static struct attribute *iommu_format_attrs[] = {
|
||||
&format_attr_csource.attr,
|
||||
&format_attr_devid.attr,
|
||||
&format_attr_pasid.attr,
|
||||
&format_attr_domid.attr,
|
||||
&format_attr_devid_mask.attr,
|
||||
&format_attr_pasid_mask.attr,
|
||||
&format_attr_domid_mask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_iommu_format_group = {
|
||||
.name = "format",
|
||||
.attrs = iommu_format_attrs,
|
||||
};
|
||||
|
||||
/*---------------------------------------------
|
||||
* sysfs events attributes
|
||||
*---------------------------------------------*/
|
||||
struct amd_iommu_event_desc {
|
||||
struct kobj_attribute attr;
|
||||
const char *event;
|
||||
};
|
||||
|
||||
static ssize_t _iommu_event_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct amd_iommu_event_desc *event =
|
||||
container_of(attr, struct amd_iommu_event_desc, attr);
|
||||
return sprintf(buf, "%s\n", event->event);
|
||||
}
|
||||
|
||||
#define AMD_IOMMU_EVENT_DESC(_name, _event) \
|
||||
{ \
|
||||
.attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
|
||||
.event = _event, \
|
||||
}
|
||||
|
||||
static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
|
||||
AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
|
||||
AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
|
||||
AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
|
||||
AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
|
||||
AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
|
||||
AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
|
||||
AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
|
||||
AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
|
||||
AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
|
||||
AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
/*---------------------------------------------
|
||||
* sysfs cpumask attributes
|
||||
*---------------------------------------------*/
|
||||
static cpumask_t iommu_cpumask;
|
||||
|
||||
static ssize_t _iommu_cpumask_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
|
||||
}
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
|
||||
|
||||
static struct attribute *iommu_cpumask_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_iommu_cpumask_group = {
|
||||
.attrs = iommu_cpumask_attrs,
|
||||
};
|
||||
|
||||
/*---------------------------------------------*/
|
||||
|
||||
static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
|
||||
{
|
||||
unsigned long flags;
|
||||
int shift, bank, cntr, retval;
|
||||
int max_banks = perf_iommu->max_banks;
|
||||
int max_cntrs = perf_iommu->max_counters;
|
||||
|
||||
raw_spin_lock_irqsave(&perf_iommu->lock, flags);
|
||||
|
||||
for (bank = 0, shift = 0; bank < max_banks; bank++) {
|
||||
for (cntr = 0; cntr < max_cntrs; cntr++) {
|
||||
shift = bank + (bank*3) + cntr;
|
||||
if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
|
||||
continue;
|
||||
} else {
|
||||
perf_iommu->cntr_assign_mask |= (1ULL<<shift);
|
||||
retval = ((u16)((u16)bank<<8) | (u8)(cntr));
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
retval = -ENOSPC;
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
|
||||
u8 bank, u8 cntr)
|
||||
{
|
||||
unsigned long flags;
|
||||
int max_banks, max_cntrs;
|
||||
int shift = 0;
|
||||
|
||||
max_banks = perf_iommu->max_banks;
|
||||
max_cntrs = perf_iommu->max_counters;
|
||||
|
||||
if ((bank > max_banks) || (cntr > max_cntrs))
|
||||
return -EINVAL;
|
||||
|
||||
shift = bank + cntr + (bank*3);
|
||||
|
||||
raw_spin_lock_irqsave(&perf_iommu->lock, flags);
|
||||
perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
|
||||
raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_iommu_event_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_amd_iommu *perf_iommu;
|
||||
u64 config, config1;
|
||||
|
||||
/* test the event attr type check for PMU enumeration */
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* IOMMU counters are shared across all cores.
|
||||
* Therefore, it does not support per-process mode.
|
||||
* Also, it does not support event sampling mode.
|
||||
*/
|
||||
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
|
||||
return -EINVAL;
|
||||
|
||||
/* IOMMU counters do not have usr/os/guest/host bits */
|
||||
if (event->attr.exclude_user || event->attr.exclude_kernel ||
|
||||
event->attr.exclude_host || event->attr.exclude_guest)
|
||||
return -EINVAL;
|
||||
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
perf_iommu = &__perf_iommu;
|
||||
|
||||
if (event->pmu != &perf_iommu->pmu)
|
||||
return -ENOENT;
|
||||
|
||||
if (perf_iommu) {
|
||||
config = event->attr.config;
|
||||
config1 = event->attr.config1;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* integrate with iommu base devid (0000), assume one iommu */
|
||||
perf_iommu->max_banks =
|
||||
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
|
||||
perf_iommu->max_counters =
|
||||
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
|
||||
if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
|
||||
return -EINVAL;
|
||||
|
||||
/* update the hw_perf_event struct with the iommu config data */
|
||||
hwc->config = config;
|
||||
hwc->extra_reg.config = config1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_iommu_enable_event(struct perf_event *ev)
|
||||
{
|
||||
u8 csource = _GET_CSOURCE(ev);
|
||||
u16 devid = _GET_DEVID(ev);
|
||||
u64 reg = 0ULL;
|
||||
|
||||
reg = csource;
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®, true);
|
||||
|
||||
reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_DEVID_MATCH_REG, ®, true);
|
||||
|
||||
reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_PASID_MATCH_REG, ®, true);
|
||||
|
||||
reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_DOMID_MATCH_REG, ®, true);
|
||||
}
|
||||
|
||||
static void perf_iommu_disable_event(struct perf_event *event)
|
||||
{
|
||||
u64 reg = 0ULL;
|
||||
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®, true);
|
||||
}
|
||||
|
||||
static void perf_iommu_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_start\n");
|
||||
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
|
||||
hwc->state = 0;
|
||||
|
||||
if (flags & PERF_EF_RELOAD) {
|
||||
u64 prev_raw_count = local64_read(&hwc->prev_count);
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
|
||||
}
|
||||
|
||||
perf_iommu_enable_event(event);
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
}
|
||||
|
||||
static void perf_iommu_read(struct perf_event *event)
|
||||
{
|
||||
u64 count = 0ULL;
|
||||
u64 prev_raw_count = 0ULL;
|
||||
u64 delta = 0ULL;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
pr_debug("perf: amd_iommu:perf_iommu_read\n");
|
||||
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_REG, &count, false);
|
||||
|
||||
/* IOMMU pc counter register is only 48 bits */
|
||||
count &= 0xFFFFFFFFFFFFULL;
|
||||
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
count) != prev_raw_count)
|
||||
return;
|
||||
|
||||
/* Handling 48-bit counter overflowing */
|
||||
delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
|
||||
delta >>= COUNTER_SHIFT;
|
||||
local64_add(delta, &event->count);
|
||||
|
||||
}
|
||||
|
||||
static void perf_iommu_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 config;
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_stop\n");
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
perf_iommu_disable_event(event);
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
config = hwc->config;
|
||||
perf_iommu_read(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
static int perf_iommu_add(struct perf_event *event, int flags)
|
||||
{
|
||||
int retval;
|
||||
struct perf_amd_iommu *perf_iommu =
|
||||
container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_add\n");
|
||||
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
/* request an iommu bank/counter */
|
||||
retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
|
||||
if (retval != -ENOSPC)
|
||||
event->hw.extra_reg.reg = (u16)retval;
|
||||
else
|
||||
return retval;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
perf_iommu_start(event, PERF_EF_RELOAD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_iommu_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct perf_amd_iommu *perf_iommu =
|
||||
container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_del\n");
|
||||
perf_iommu_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
/* clear the assigned iommu bank/counter */
|
||||
clear_avail_iommu_bnk_cntr(perf_iommu,
|
||||
_GET_BANK(event),
|
||||
_GET_CNTR(event));
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
|
||||
{
|
||||
struct attribute **attrs;
|
||||
struct attribute_group *attr_group;
|
||||
int i = 0, j;
|
||||
|
||||
while (amd_iommu_v2_event_descs[i].attr.attr.name)
|
||||
i++;
|
||||
|
||||
attr_group = kzalloc(sizeof(struct attribute *)
|
||||
* (i + 1) + sizeof(*attr_group), GFP_KERNEL);
|
||||
if (!attr_group)
|
||||
return -ENOMEM;
|
||||
|
||||
attrs = (struct attribute **)(attr_group + 1);
|
||||
for (j = 0; j < i; j++)
|
||||
attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
|
||||
|
||||
attr_group->name = "events";
|
||||
attr_group->attrs = attrs;
|
||||
perf_iommu->events_group = attr_group;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init void amd_iommu_pc_exit(void)
|
||||
{
|
||||
if (__perf_iommu.events_group != NULL) {
|
||||
kfree(__perf_iommu.events_group);
|
||||
__perf_iommu.events_group = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static __init int _init_perf_amd_iommu(
|
||||
struct perf_amd_iommu *perf_iommu, char *name)
|
||||
{
|
||||
int ret;
|
||||
|
||||
raw_spin_lock_init(&perf_iommu->lock);
|
||||
|
||||
/* Init format attributes */
|
||||
perf_iommu->format_group = &amd_iommu_format_group;
|
||||
|
||||
/* Init cpumask attributes to only core 0 */
|
||||
cpumask_set_cpu(0, &iommu_cpumask);
|
||||
perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
|
||||
|
||||
/* Init events attributes */
|
||||
if (_init_events_attrs(perf_iommu) != 0)
|
||||
pr_err("perf: amd_iommu: Only support raw events.\n");
|
||||
|
||||
/* Init null attributes */
|
||||
perf_iommu->null_group = NULL;
|
||||
perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
|
||||
|
||||
ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
|
||||
if (ret) {
|
||||
pr_err("perf: amd_iommu: Failed to initialized.\n");
|
||||
amd_iommu_pc_exit();
|
||||
} else {
|
||||
pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
|
||||
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
|
||||
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct perf_amd_iommu __perf_iommu = {
|
||||
.pmu = {
|
||||
.event_init = perf_iommu_event_init,
|
||||
.add = perf_iommu_add,
|
||||
.del = perf_iommu_del,
|
||||
.start = perf_iommu_start,
|
||||
.stop = perf_iommu_stop,
|
||||
.read = perf_iommu_read,
|
||||
},
|
||||
.max_banks = 0x00,
|
||||
.max_counters = 0x00,
|
||||
.cntr_assign_mask = 0ULL,
|
||||
.format_group = NULL,
|
||||
.cpumask_group = NULL,
|
||||
.events_group = NULL,
|
||||
.null_group = NULL,
|
||||
};
|
||||
|
||||
static __init int amd_iommu_pc_init(void)
|
||||
{
|
||||
/* Make sure the IOMMU PC resource is available */
|
||||
if (!amd_iommu_pc_supported())
|
||||
return -ENODEV;
|
||||
|
||||
_init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
device_initcall(amd_iommu_pc_init);
|
@@ -1,40 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Steven Kinney <Steven.Kinney@amd.com>
|
||||
* Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#ifndef _PERF_EVENT_AMD_IOMMU_H_
|
||||
#define _PERF_EVENT_AMD_IOMMU_H_
|
||||
|
||||
/* iommu pc mmio region register indexes */
|
||||
#define IOMMU_PC_COUNTER_REG 0x00
|
||||
#define IOMMU_PC_COUNTER_SRC_REG 0x08
|
||||
#define IOMMU_PC_PASID_MATCH_REG 0x10
|
||||
#define IOMMU_PC_DOMID_MATCH_REG 0x18
|
||||
#define IOMMU_PC_DEVID_MATCH_REG 0x20
|
||||
#define IOMMU_PC_COUNTER_REPORT_REG 0x28
|
||||
|
||||
/* maximun specified bank/counters */
|
||||
#define PC_MAX_SPEC_BNKS 64
|
||||
#define PC_MAX_SPEC_CNTRS 16
|
||||
|
||||
/* iommu pc reg masks*/
|
||||
#define IOMMU_BASE_DEVID 0x0000
|
||||
|
||||
/* amd_iommu_init.c external support functions */
|
||||
extern bool amd_iommu_pc_supported(void);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||
|
||||
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value, bool is_write);
|
||||
|
||||
#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
|
@@ -1,603 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Jacob Shin <jacob.shin@amd.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#define NUM_COUNTERS_NB 4
|
||||
#define NUM_COUNTERS_L2 4
|
||||
#define MAX_COUNTERS NUM_COUNTERS_NB
|
||||
|
||||
#define RDPMC_BASE_NB 6
|
||||
#define RDPMC_BASE_L2 10
|
||||
|
||||
#define COUNTER_SHIFT 16
|
||||
|
||||
struct amd_uncore {
|
||||
int id;
|
||||
int refcnt;
|
||||
int cpu;
|
||||
int num_counters;
|
||||
int rdpmc_base;
|
||||
u32 msr_base;
|
||||
cpumask_t *active_mask;
|
||||
struct pmu *pmu;
|
||||
struct perf_event *events[MAX_COUNTERS];
|
||||
struct amd_uncore *free_when_cpu_online;
|
||||
};
|
||||
|
||||
static struct amd_uncore * __percpu *amd_uncore_nb;
|
||||
static struct amd_uncore * __percpu *amd_uncore_l2;
|
||||
|
||||
static struct pmu amd_nb_pmu;
|
||||
static struct pmu amd_l2_pmu;
|
||||
|
||||
static cpumask_t amd_nb_active_mask;
|
||||
static cpumask_t amd_l2_active_mask;
|
||||
|
||||
static bool is_nb_event(struct perf_event *event)
|
||||
{
|
||||
return event->pmu->type == amd_nb_pmu.type;
|
||||
}
|
||||
|
||||
static bool is_l2_event(struct perf_event *event)
|
||||
{
|
||||
return event->pmu->type == amd_l2_pmu.type;
|
||||
}
|
||||
|
||||
static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
|
||||
{
|
||||
if (is_nb_event(event) && amd_uncore_nb)
|
||||
return *per_cpu_ptr(amd_uncore_nb, event->cpu);
|
||||
else if (is_l2_event(event) && amd_uncore_l2)
|
||||
return *per_cpu_ptr(amd_uncore_l2, event->cpu);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void amd_uncore_read(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 prev, new;
|
||||
s64 delta;
|
||||
|
||||
/*
|
||||
* since we do not enable counter overflow interrupts,
|
||||
* we do not have to worry about prev_count changing on us
|
||||
*/
|
||||
|
||||
prev = local64_read(&hwc->prev_count);
|
||||
rdpmcl(hwc->event_base_rdpmc, new);
|
||||
local64_set(&hwc->prev_count, new);
|
||||
delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
|
||||
delta >>= COUNTER_SHIFT;
|
||||
local64_add(delta, &event->count);
|
||||
}
|
||||
|
||||
static void amd_uncore_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (flags & PERF_EF_RELOAD)
|
||||
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
|
||||
|
||||
hwc->state = 0;
|
||||
wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static void amd_uncore_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
wrmsrl(hwc->config_base, hwc->config);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
|
||||
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
|
||||
amd_uncore_read(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
}
|
||||
|
||||
static int amd_uncore_add(struct perf_event *event, int flags)
|
||||
{
|
||||
int i;
|
||||
struct amd_uncore *uncore = event_to_amd_uncore(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
/* are we already assigned? */
|
||||
if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < uncore->num_counters; i++) {
|
||||
if (uncore->events[i] == event) {
|
||||
hwc->idx = i;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* if not, take the first available counter */
|
||||
hwc->idx = -1;
|
||||
for (i = 0; i < uncore->num_counters; i++) {
|
||||
if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
|
||||
hwc->idx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (hwc->idx == -1)
|
||||
return -EBUSY;
|
||||
|
||||
hwc->config_base = uncore->msr_base + (2 * hwc->idx);
|
||||
hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
|
||||
hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
amd_uncore_start(event, PERF_EF_RELOAD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amd_uncore_del(struct perf_event *event, int flags)
|
||||
{
|
||||
int i;
|
||||
struct amd_uncore *uncore = event_to_amd_uncore(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
amd_uncore_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
for (i = 0; i < uncore->num_counters; i++) {
|
||||
if (cmpxchg(&uncore->events[i], event, NULL) == event)
|
||||
break;
|
||||
}
|
||||
|
||||
hwc->idx = -1;
|
||||
}
|
||||
|
||||
static int amd_uncore_event_init(struct perf_event *event)
|
||||
{
|
||||
struct amd_uncore *uncore;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* NB and L2 counters (MSRs) are shared across all cores that share the
|
||||
* same NB / L2 cache. Interrupts can be directed to a single target
|
||||
* core, however, event counts generated by processes running on other
|
||||
* cores cannot be masked out. So we do not support sampling and
|
||||
* per-thread events.
|
||||
*/
|
||||
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
|
||||
return -EINVAL;
|
||||
|
||||
/* NB and L2 counters do not have usr/os/guest/host bits */
|
||||
if (event->attr.exclude_user || event->attr.exclude_kernel ||
|
||||
event->attr.exclude_host || event->attr.exclude_guest)
|
||||
return -EINVAL;
|
||||
|
||||
/* and we do not enable counter overflow interrupts */
|
||||
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
|
||||
hwc->idx = -1;
|
||||
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
uncore = event_to_amd_uncore(event);
|
||||
if (!uncore)
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* since request can come in to any of the shared cores, we will remap
|
||||
* to a single common cpu.
|
||||
*/
|
||||
event->cpu = uncore->cpu;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
cpumask_t *active_mask;
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
|
||||
if (pmu->type == amd_nb_pmu.type)
|
||||
active_mask = &amd_nb_active_mask;
|
||||
else if (pmu->type == amd_l2_pmu.type)
|
||||
active_mask = &amd_l2_active_mask;
|
||||
else
|
||||
return 0;
|
||||
|
||||
return cpumap_print_to_pagebuf(true, buf, active_mask);
|
||||
}
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
|
||||
|
||||
static struct attribute *amd_uncore_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_uncore_attr_group = {
|
||||
.attrs = amd_uncore_attrs,
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7,32-35");
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15");
|
||||
|
||||
static struct attribute *amd_uncore_format_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_uncore_format_group = {
|
||||
.name = "format",
|
||||
.attrs = amd_uncore_format_attr,
|
||||
};
|
||||
|
||||
static const struct attribute_group *amd_uncore_attr_groups[] = {
|
||||
&amd_uncore_attr_group,
|
||||
&amd_uncore_format_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct pmu amd_nb_pmu = {
|
||||
.attr_groups = amd_uncore_attr_groups,
|
||||
.name = "amd_nb",
|
||||
.event_init = amd_uncore_event_init,
|
||||
.add = amd_uncore_add,
|
||||
.del = amd_uncore_del,
|
||||
.start = amd_uncore_start,
|
||||
.stop = amd_uncore_stop,
|
||||
.read = amd_uncore_read,
|
||||
};
|
||||
|
||||
static struct pmu amd_l2_pmu = {
|
||||
.attr_groups = amd_uncore_attr_groups,
|
||||
.name = "amd_l2",
|
||||
.event_init = amd_uncore_event_init,
|
||||
.add = amd_uncore_add,
|
||||
.del = amd_uncore_del,
|
||||
.start = amd_uncore_start,
|
||||
.stop = amd_uncore_stop,
|
||||
.read = amd_uncore_read,
|
||||
};
|
||||
|
||||
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
|
||||
{
|
||||
return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
|
||||
cpu_to_node(cpu));
|
||||
}
|
||||
|
||||
static int amd_uncore_cpu_up_prepare(unsigned int cpu)
|
||||
{
|
||||
struct amd_uncore *uncore_nb = NULL, *uncore_l2;
|
||||
|
||||
if (amd_uncore_nb) {
|
||||
uncore_nb = amd_uncore_alloc(cpu);
|
||||
if (!uncore_nb)
|
||||
goto fail;
|
||||
uncore_nb->cpu = cpu;
|
||||
uncore_nb->num_counters = NUM_COUNTERS_NB;
|
||||
uncore_nb->rdpmc_base = RDPMC_BASE_NB;
|
||||
uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
|
||||
uncore_nb->active_mask = &amd_nb_active_mask;
|
||||
uncore_nb->pmu = &amd_nb_pmu;
|
||||
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
|
||||
}
|
||||
|
||||
if (amd_uncore_l2) {
|
||||
uncore_l2 = amd_uncore_alloc(cpu);
|
||||
if (!uncore_l2)
|
||||
goto fail;
|
||||
uncore_l2->cpu = cpu;
|
||||
uncore_l2->num_counters = NUM_COUNTERS_L2;
|
||||
uncore_l2->rdpmc_base = RDPMC_BASE_L2;
|
||||
uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
|
||||
uncore_l2->active_mask = &amd_l2_active_mask;
|
||||
uncore_l2->pmu = &amd_l2_pmu;
|
||||
*per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (amd_uncore_nb)
|
||||
*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
|
||||
kfree(uncore_nb);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static struct amd_uncore *
|
||||
amd_uncore_find_online_sibling(struct amd_uncore *this,
|
||||
struct amd_uncore * __percpu *uncores)
|
||||
{
|
||||
unsigned int cpu;
|
||||
struct amd_uncore *that;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
that = *per_cpu_ptr(uncores, cpu);
|
||||
|
||||
if (!that)
|
||||
continue;
|
||||
|
||||
if (this == that)
|
||||
continue;
|
||||
|
||||
if (this->id == that->id) {
|
||||
that->free_when_cpu_online = this;
|
||||
this = that;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
this->refcnt++;
|
||||
return this;
|
||||
}
|
||||
|
||||
static void amd_uncore_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
struct amd_uncore *uncore;
|
||||
|
||||
if (amd_uncore_nb) {
|
||||
uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
|
||||
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
|
||||
uncore->id = ecx & 0xff;
|
||||
|
||||
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
|
||||
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
|
||||
}
|
||||
|
||||
if (amd_uncore_l2) {
|
||||
unsigned int apicid = cpu_data(cpu).apicid;
|
||||
unsigned int nshared;
|
||||
|
||||
uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
|
||||
cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
|
||||
nshared = ((eax >> 14) & 0xfff) + 1;
|
||||
uncore->id = apicid - (apicid % nshared);
|
||||
|
||||
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
|
||||
*per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
|
||||
}
|
||||
}
|
||||
|
||||
static void uncore_online(unsigned int cpu,
|
||||
struct amd_uncore * __percpu *uncores)
|
||||
{
|
||||
struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
|
||||
|
||||
kfree(uncore->free_when_cpu_online);
|
||||
uncore->free_when_cpu_online = NULL;
|
||||
|
||||
if (cpu == uncore->cpu)
|
||||
cpumask_set_cpu(cpu, uncore->active_mask);
|
||||
}
|
||||
|
||||
static void amd_uncore_cpu_online(unsigned int cpu)
|
||||
{
|
||||
if (amd_uncore_nb)
|
||||
uncore_online(cpu, amd_uncore_nb);
|
||||
|
||||
if (amd_uncore_l2)
|
||||
uncore_online(cpu, amd_uncore_l2);
|
||||
}
|
||||
|
||||
static void uncore_down_prepare(unsigned int cpu,
|
||||
struct amd_uncore * __percpu *uncores)
|
||||
{
|
||||
unsigned int i;
|
||||
struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
|
||||
|
||||
if (this->cpu != cpu)
|
||||
return;
|
||||
|
||||
/* this cpu is going down, migrate to a shared sibling if possible */
|
||||
for_each_online_cpu(i) {
|
||||
struct amd_uncore *that = *per_cpu_ptr(uncores, i);
|
||||
|
||||
if (cpu == i)
|
||||
continue;
|
||||
|
||||
if (this == that) {
|
||||
perf_pmu_migrate_context(this->pmu, cpu, i);
|
||||
cpumask_clear_cpu(cpu, that->active_mask);
|
||||
cpumask_set_cpu(i, that->active_mask);
|
||||
that->cpu = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void amd_uncore_cpu_down_prepare(unsigned int cpu)
|
||||
{
|
||||
if (amd_uncore_nb)
|
||||
uncore_down_prepare(cpu, amd_uncore_nb);
|
||||
|
||||
if (amd_uncore_l2)
|
||||
uncore_down_prepare(cpu, amd_uncore_l2);
|
||||
}
|
||||
|
||||
static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
|
||||
{
|
||||
struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
|
||||
|
||||
if (cpu == uncore->cpu)
|
||||
cpumask_clear_cpu(cpu, uncore->active_mask);
|
||||
|
||||
if (!--uncore->refcnt)
|
||||
kfree(uncore);
|
||||
*per_cpu_ptr(uncores, cpu) = NULL;
|
||||
}
|
||||
|
||||
static void amd_uncore_cpu_dead(unsigned int cpu)
|
||||
{
|
||||
if (amd_uncore_nb)
|
||||
uncore_dead(cpu, amd_uncore_nb);
|
||||
|
||||
if (amd_uncore_l2)
|
||||
uncore_dead(cpu, amd_uncore_l2);
|
||||
}
|
||||
|
||||
static int
|
||||
amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (long)hcpu;
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_UP_PREPARE:
|
||||
if (amd_uncore_cpu_up_prepare(cpu))
|
||||
return notifier_from_errno(-ENOMEM);
|
||||
break;
|
||||
|
||||
case CPU_STARTING:
|
||||
amd_uncore_cpu_starting(cpu);
|
||||
break;
|
||||
|
||||
case CPU_ONLINE:
|
||||
amd_uncore_cpu_online(cpu);
|
||||
break;
|
||||
|
||||
case CPU_DOWN_PREPARE:
|
||||
amd_uncore_cpu_down_prepare(cpu);
|
||||
break;
|
||||
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DEAD:
|
||||
amd_uncore_cpu_dead(cpu);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block amd_uncore_cpu_notifier_block = {
|
||||
.notifier_call = amd_uncore_cpu_notifier,
|
||||
.priority = CPU_PRI_PERF + 1,
|
||||
};
|
||||
|
||||
static void __init init_cpu_already_online(void *dummy)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
amd_uncore_cpu_starting(cpu);
|
||||
amd_uncore_cpu_online(cpu);
|
||||
}
|
||||
|
||||
static void cleanup_cpu_online(void *dummy)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
amd_uncore_cpu_dead(cpu);
|
||||
}
|
||||
|
||||
static int __init amd_uncore_init(void)
|
||||
{
|
||||
unsigned int cpu, cpu2;
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
||||
goto fail_nodev;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
|
||||
goto fail_nodev;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
|
||||
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
|
||||
if (!amd_uncore_nb) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_nb;
|
||||
}
|
||||
ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
|
||||
if (ret)
|
||||
goto fail_nb;
|
||||
|
||||
printk(KERN_INFO "perf: AMD NB counters detected\n");
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
|
||||
amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
|
||||
if (!amd_uncore_l2) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_l2;
|
||||
}
|
||||
ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
|
||||
if (ret)
|
||||
goto fail_l2;
|
||||
|
||||
printk(KERN_INFO "perf: AMD L2I counters detected\n");
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto fail_nodev;
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
/* init cpus already online before registering for hotplug notifier */
|
||||
for_each_online_cpu(cpu) {
|
||||
ret = amd_uncore_cpu_up_prepare(cpu);
|
||||
if (ret)
|
||||
goto fail_online;
|
||||
smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
|
||||
}
|
||||
|
||||
__register_cpu_notifier(&amd_uncore_cpu_notifier_block);
|
||||
cpu_notifier_register_done();
|
||||
|
||||
return 0;
|
||||
|
||||
|
||||
fail_online:
|
||||
for_each_online_cpu(cpu2) {
|
||||
if (cpu2 == cpu)
|
||||
break;
|
||||
smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
|
||||
}
|
||||
cpu_notifier_register_done();
|
||||
|
||||
/* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
|
||||
amd_uncore_nb = amd_uncore_l2 = NULL;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
|
||||
perf_pmu_unregister(&amd_l2_pmu);
|
||||
fail_l2:
|
||||
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
|
||||
perf_pmu_unregister(&amd_nb_pmu);
|
||||
if (amd_uncore_l2)
|
||||
free_percpu(amd_uncore_l2);
|
||||
fail_nb:
|
||||
if (amd_uncore_nb)
|
||||
free_percpu(amd_uncore_nb);
|
||||
|
||||
fail_nodev:
|
||||
return ret;
|
||||
}
|
||||
device_initcall(amd_uncore_init);
|
File diff suppressed because it is too large
Load Diff
@@ -1,544 +0,0 @@
|
||||
/*
|
||||
* BTS PMU driver for perf
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#undef DEBUG
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/coredump.h>
|
||||
|
||||
#include <asm-generic/sizes.h>
|
||||
#include <asm/perf_event.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
struct bts_ctx {
|
||||
struct perf_output_handle handle;
|
||||
struct debug_store ds_back;
|
||||
int started;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
|
||||
|
||||
#define BTS_RECORD_SIZE 24
|
||||
#define BTS_SAFETY_MARGIN 4080
|
||||
|
||||
struct bts_phys {
|
||||
struct page *page;
|
||||
unsigned long size;
|
||||
unsigned long offset;
|
||||
unsigned long displacement;
|
||||
};
|
||||
|
||||
struct bts_buffer {
|
||||
size_t real_size; /* multiple of BTS_RECORD_SIZE */
|
||||
unsigned int nr_pages;
|
||||
unsigned int nr_bufs;
|
||||
unsigned int cur_buf;
|
||||
bool snapshot;
|
||||
local_t data_size;
|
||||
local_t lost;
|
||||
local_t head;
|
||||
unsigned long end;
|
||||
void **data_pages;
|
||||
struct bts_phys buf[0];
|
||||
};
|
||||
|
||||
struct pmu bts_pmu;
|
||||
|
||||
static size_t buf_size(struct page *page)
|
||||
{
|
||||
return 1 << (PAGE_SHIFT + page_private(page));
|
||||
}
|
||||
|
||||
static void *
|
||||
bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
|
||||
{
|
||||
struct bts_buffer *buf;
|
||||
struct page *page;
|
||||
int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
|
||||
unsigned long offset;
|
||||
size_t size = nr_pages << PAGE_SHIFT;
|
||||
int pg, nbuf, pad;
|
||||
|
||||
/* count all the high order buffers */
|
||||
for (pg = 0, nbuf = 0; pg < nr_pages;) {
|
||||
page = virt_to_page(pages[pg]);
|
||||
if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
|
||||
return NULL;
|
||||
pg += 1 << page_private(page);
|
||||
nbuf++;
|
||||
}
|
||||
|
||||
/*
|
||||
* to avoid interrupts in overwrite mode, only allow one physical
|
||||
*/
|
||||
if (overwrite && nbuf > 1)
|
||||
return NULL;
|
||||
|
||||
buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
|
||||
if (!buf)
|
||||
return NULL;
|
||||
|
||||
buf->nr_pages = nr_pages;
|
||||
buf->nr_bufs = nbuf;
|
||||
buf->snapshot = overwrite;
|
||||
buf->data_pages = pages;
|
||||
buf->real_size = size - size % BTS_RECORD_SIZE;
|
||||
|
||||
for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
|
||||
unsigned int __nr_pages;
|
||||
|
||||
page = virt_to_page(pages[pg]);
|
||||
__nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
|
||||
buf->buf[nbuf].page = page;
|
||||
buf->buf[nbuf].offset = offset;
|
||||
buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
|
||||
buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
|
||||
pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
|
||||
buf->buf[nbuf].size -= pad;
|
||||
|
||||
pg += __nr_pages;
|
||||
offset += __nr_pages << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void bts_buffer_free_aux(void *data)
|
||||
{
|
||||
kfree(data);
|
||||
}
|
||||
|
||||
static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
|
||||
{
|
||||
return buf->buf[idx].offset + buf->buf[idx].displacement;
|
||||
}
|
||||
|
||||
static void
|
||||
bts_config_buffer(struct bts_buffer *buf)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
struct bts_phys *phys = &buf->buf[buf->cur_buf];
|
||||
unsigned long index, thresh = 0, end = phys->size;
|
||||
struct page *page = phys->page;
|
||||
|
||||
index = local_read(&buf->head);
|
||||
|
||||
if (!buf->snapshot) {
|
||||
if (buf->end < phys->offset + buf_size(page))
|
||||
end = buf->end - phys->offset - phys->displacement;
|
||||
|
||||
index -= phys->offset + phys->displacement;
|
||||
|
||||
if (end - index > BTS_SAFETY_MARGIN)
|
||||
thresh = end - BTS_SAFETY_MARGIN;
|
||||
else if (end - index > BTS_RECORD_SIZE)
|
||||
thresh = end - BTS_RECORD_SIZE;
|
||||
else
|
||||
thresh = end;
|
||||
}
|
||||
|
||||
ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
|
||||
ds->bts_index = ds->bts_buffer_base + index;
|
||||
ds->bts_absolute_maximum = ds->bts_buffer_base + end;
|
||||
ds->bts_interrupt_threshold = !buf->snapshot
|
||||
? ds->bts_buffer_base + thresh
|
||||
: ds->bts_absolute_maximum + BTS_RECORD_SIZE;
|
||||
}
|
||||
|
||||
static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
|
||||
{
|
||||
unsigned long index = head - phys->offset;
|
||||
|
||||
memset(page_address(phys->page) + index, 0, phys->size - index);
|
||||
}
|
||||
|
||||
static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
|
||||
{
|
||||
if (buf->snapshot)
|
||||
return false;
|
||||
|
||||
if (local_read(&buf->data_size) >= bts->handle.size ||
|
||||
bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void bts_update(struct bts_ctx *bts)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
||||
unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
|
||||
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
head = index + bts_buffer_offset(buf, buf->cur_buf);
|
||||
old = local_xchg(&buf->head, head);
|
||||
|
||||
if (!buf->snapshot) {
|
||||
if (old == head)
|
||||
return;
|
||||
|
||||
if (ds->bts_index >= ds->bts_absolute_maximum)
|
||||
local_inc(&buf->lost);
|
||||
|
||||
/*
|
||||
* old and head are always in the same physical buffer, so we
|
||||
* can subtract them to get the data size.
|
||||
*/
|
||||
local_add(head - old, &buf->data_size);
|
||||
} else {
|
||||
local_set(&buf->data_size, head);
|
||||
}
|
||||
}
|
||||
|
||||
static void __bts_event_start(struct perf_event *event)
|
||||
{
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
||||
u64 config = 0;
|
||||
|
||||
if (!buf || bts_buffer_is_full(buf, bts))
|
||||
return;
|
||||
|
||||
event->hw.itrace_started = 1;
|
||||
event->hw.state = 0;
|
||||
|
||||
if (!buf->snapshot)
|
||||
config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||
if (!event->attr.exclude_kernel)
|
||||
config |= ARCH_PERFMON_EVENTSEL_OS;
|
||||
if (!event->attr.exclude_user)
|
||||
config |= ARCH_PERFMON_EVENTSEL_USR;
|
||||
|
||||
bts_config_buffer(buf);
|
||||
|
||||
/*
|
||||
* local barrier to make sure that ds configuration made it
|
||||
* before we enable BTS
|
||||
*/
|
||||
wmb();
|
||||
|
||||
intel_pmu_enable_bts(config);
|
||||
}
|
||||
|
||||
static void bts_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
|
||||
__bts_event_start(event);
|
||||
|
||||
/* PMI handler: this counter is running and likely generating PMIs */
|
||||
ACCESS_ONCE(bts->started) = 1;
|
||||
}
|
||||
|
||||
static void __bts_event_stop(struct perf_event *event)
|
||||
{
|
||||
/*
|
||||
* No extra synchronization is mandated by the documentation to have
|
||||
* BTS data stores globally visible.
|
||||
*/
|
||||
intel_pmu_disable_bts();
|
||||
|
||||
if (event->hw.state & PERF_HES_STOPPED)
|
||||
return;
|
||||
|
||||
ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
static void bts_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
|
||||
/* PMI handler: don't restart this counter */
|
||||
ACCESS_ONCE(bts->started) = 0;
|
||||
|
||||
__bts_event_stop(event);
|
||||
|
||||
if (flags & PERF_EF_UPDATE)
|
||||
bts_update(bts);
|
||||
}
|
||||
|
||||
void intel_bts_enable_local(void)
|
||||
{
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
|
||||
if (bts->handle.event && bts->started)
|
||||
__bts_event_start(bts->handle.event);
|
||||
}
|
||||
|
||||
void intel_bts_disable_local(void)
|
||||
{
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
|
||||
if (bts->handle.event)
|
||||
__bts_event_stop(bts->handle.event);
|
||||
}
|
||||
|
||||
static int
|
||||
bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
|
||||
{
|
||||
unsigned long head, space, next_space, pad, gap, skip, wakeup;
|
||||
unsigned int next_buf;
|
||||
struct bts_phys *phys, *next_phys;
|
||||
int ret;
|
||||
|
||||
if (buf->snapshot)
|
||||
return 0;
|
||||
|
||||
head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
|
||||
if (WARN_ON_ONCE(head != local_read(&buf->head)))
|
||||
return -EINVAL;
|
||||
|
||||
phys = &buf->buf[buf->cur_buf];
|
||||
space = phys->offset + phys->displacement + phys->size - head;
|
||||
pad = space;
|
||||
if (space > handle->size) {
|
||||
space = handle->size;
|
||||
space -= space % BTS_RECORD_SIZE;
|
||||
}
|
||||
if (space <= BTS_SAFETY_MARGIN) {
|
||||
/* See if next phys buffer has more space */
|
||||
next_buf = buf->cur_buf + 1;
|
||||
if (next_buf >= buf->nr_bufs)
|
||||
next_buf = 0;
|
||||
next_phys = &buf->buf[next_buf];
|
||||
gap = buf_size(phys->page) - phys->displacement - phys->size +
|
||||
next_phys->displacement;
|
||||
skip = pad + gap;
|
||||
if (handle->size >= skip) {
|
||||
next_space = next_phys->size;
|
||||
if (next_space + skip > handle->size) {
|
||||
next_space = handle->size - skip;
|
||||
next_space -= next_space % BTS_RECORD_SIZE;
|
||||
}
|
||||
if (next_space > space || !space) {
|
||||
if (pad)
|
||||
bts_buffer_pad_out(phys, head);
|
||||
ret = perf_aux_output_skip(handle, skip);
|
||||
if (ret)
|
||||
return ret;
|
||||
/* Advance to next phys buffer */
|
||||
phys = next_phys;
|
||||
space = next_space;
|
||||
head = phys->offset + phys->displacement;
|
||||
/*
|
||||
* After this, cur_buf and head won't match ds
|
||||
* anymore, so we must not be racing with
|
||||
* bts_update().
|
||||
*/
|
||||
buf->cur_buf = next_buf;
|
||||
local_set(&buf->head, head);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Don't go far beyond wakeup watermark */
|
||||
wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
|
||||
handle->head;
|
||||
if (space > wakeup) {
|
||||
space = wakeup;
|
||||
space -= space % BTS_RECORD_SIZE;
|
||||
}
|
||||
|
||||
buf->end = head + space;
|
||||
|
||||
/*
|
||||
* If we have no space, the lost notification would have been sent when
|
||||
* we hit absolute_maximum - see bts_update()
|
||||
*/
|
||||
if (!space)
|
||||
return -ENOSPC;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int intel_bts_interrupt(void)
|
||||
{
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
struct perf_event *event = bts->handle.event;
|
||||
struct bts_buffer *buf;
|
||||
s64 old_head;
|
||||
int err;
|
||||
|
||||
if (!event || !bts->started)
|
||||
return 0;
|
||||
|
||||
buf = perf_get_aux(&bts->handle);
|
||||
/*
|
||||
* Skip snapshot counters: they don't use the interrupt, but
|
||||
* there's no other way of telling, because the pointer will
|
||||
* keep moving
|
||||
*/
|
||||
if (!buf || buf->snapshot)
|
||||
return 0;
|
||||
|
||||
old_head = local_read(&buf->head);
|
||||
bts_update(bts);
|
||||
|
||||
/* no new data */
|
||||
if (old_head == local_read(&buf->head))
|
||||
return 0;
|
||||
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
||||
!!local_xchg(&buf->lost, 0));
|
||||
|
||||
buf = perf_aux_output_begin(&bts->handle, event);
|
||||
if (!buf)
|
||||
return 1;
|
||||
|
||||
err = bts_buffer_reset(buf, &bts->handle);
|
||||
if (err)
|
||||
perf_aux_output_end(&bts->handle, 0, false);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void bts_event_del(struct perf_event *event, int mode)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
||||
|
||||
bts_event_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
if (buf) {
|
||||
if (buf->snapshot)
|
||||
bts->handle.head =
|
||||
local_xchg(&buf->data_size,
|
||||
buf->nr_pages << PAGE_SHIFT);
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
||||
!!local_xchg(&buf->lost, 0));
|
||||
}
|
||||
|
||||
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
|
||||
cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
|
||||
cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
|
||||
cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
|
||||
}
|
||||
|
||||
static int bts_event_add(struct perf_event *event, int mode)
|
||||
{
|
||||
struct bts_buffer *buf;
|
||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int ret = -EBUSY;
|
||||
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
|
||||
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
|
||||
return -EBUSY;
|
||||
|
||||
if (bts->handle.event)
|
||||
return -EBUSY;
|
||||
|
||||
buf = perf_aux_output_begin(&bts->handle, event);
|
||||
if (!buf)
|
||||
return -EINVAL;
|
||||
|
||||
ret = bts_buffer_reset(buf, &bts->handle);
|
||||
if (ret) {
|
||||
perf_aux_output_end(&bts->handle, 0, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
|
||||
bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
|
||||
bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
|
||||
|
||||
if (mode & PERF_EF_START) {
|
||||
bts_event_start(event, 0);
|
||||
if (hwc->state & PERF_HES_STOPPED) {
|
||||
bts_event_del(event, 0);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bts_event_destroy(struct perf_event *event)
|
||||
{
|
||||
x86_release_hardware();
|
||||
x86_del_exclusive(x86_lbr_exclusive_bts);
|
||||
}
|
||||
|
||||
static int bts_event_init(struct perf_event *event)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (event->attr.type != bts_pmu.type)
|
||||
return -ENOENT;
|
||||
|
||||
if (x86_add_exclusive(x86_lbr_exclusive_bts))
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* BTS leaks kernel addresses even when CPL0 tracing is
|
||||
* disabled, so disallow intel_bts driver for unprivileged
|
||||
* users on paranoid systems since it provides trace data
|
||||
* to the user in a zero-copy fashion.
|
||||
*
|
||||
* Note that the default paranoia setting permits unprivileged
|
||||
* users to profile the kernel.
|
||||
*/
|
||||
if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
|
||||
!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
ret = x86_reserve_hardware();
|
||||
if (ret) {
|
||||
x86_del_exclusive(x86_lbr_exclusive_bts);
|
||||
return ret;
|
||||
}
|
||||
|
||||
event->destroy = bts_event_destroy;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bts_event_read(struct perf_event *event)
|
||||
{
|
||||
}
|
||||
|
||||
static __init int bts_init(void)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
|
||||
return -ENODEV;
|
||||
|
||||
bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
|
||||
bts_pmu.task_ctx_nr = perf_sw_context;
|
||||
bts_pmu.event_init = bts_event_init;
|
||||
bts_pmu.add = bts_event_add;
|
||||
bts_pmu.del = bts_event_del;
|
||||
bts_pmu.start = bts_event_start;
|
||||
bts_pmu.stop = bts_event_stop;
|
||||
bts_pmu.read = bts_event_read;
|
||||
bts_pmu.setup_aux = bts_buffer_setup_aux;
|
||||
bts_pmu.free_aux = bts_buffer_free_aux;
|
||||
|
||||
return perf_pmu_register(&bts_pmu, "intel_bts", -1);
|
||||
}
|
||||
arch_initcall(bts_init);
|
File diff suppressed because it is too large
Load Diff
@@ -1,694 +0,0 @@
|
||||
/*
|
||||
* perf_event_intel_cstate.c: support cstate residency counters
|
||||
*
|
||||
* Copyright (C) 2015, Intel Corp.
|
||||
* Author: Kan Liang (kan.liang@intel.com)
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file export cstate related free running (read-only) counters
|
||||
* for perf. These counters may be use simultaneously by other tools,
|
||||
* such as turbostat. However, it still make sense to implement them
|
||||
* in perf. Because we can conveniently collect them together with
|
||||
* other events, and allow to use them from tools without special MSR
|
||||
* access code.
|
||||
*
|
||||
* The events only support system-wide mode counting. There is no
|
||||
* sampling support because it is not supported by the hardware.
|
||||
*
|
||||
* According to counters' scope and category, two PMUs are registered
|
||||
* with the perf_event core subsystem.
|
||||
* - 'cstate_core': The counter is available for each physical core.
|
||||
* The counters include CORE_C*_RESIDENCY.
|
||||
* - 'cstate_pkg': The counter is available for each physical package.
|
||||
* The counters include PKG_C*_RESIDENCY.
|
||||
*
|
||||
* All of these counters are specified in the Intel® 64 and IA-32
|
||||
* Architectures Software Developer.s Manual Vol3b.
|
||||
*
|
||||
* Model specific counters:
|
||||
* MSR_CORE_C1_RES: CORE C1 Residency Counter
|
||||
* perf code: 0x00
|
||||
* Available model: SLM,AMT
|
||||
* Scope: Core (each processor core has a MSR)
|
||||
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
|
||||
* perf code: 0x01
|
||||
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
|
||||
* Scope: Core
|
||||
* MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
|
||||
* perf code: 0x02
|
||||
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
|
||||
* Scope: Core
|
||||
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
|
||||
* perf code: 0x03
|
||||
* Available model: SNB,IVB,HSW,BDW,SKL
|
||||
* Scope: Core
|
||||
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
|
||||
* perf code: 0x00
|
||||
* Available model: SNB,IVB,HSW,BDW,SKL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
|
||||
* perf code: 0x01
|
||||
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
|
||||
* perf code: 0x02
|
||||
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
|
||||
* perf code: 0x03
|
||||
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
|
||||
* perf code: 0x04
|
||||
* Available model: HSW ULT only
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
|
||||
* perf code: 0x05
|
||||
* Available model: HSW ULT only
|
||||
* Scope: Package (physical package)
|
||||
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
|
||||
* perf code: 0x06
|
||||
* Available model: HSW ULT only
|
||||
* Scope: Package (physical package)
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include "perf_event.h"
|
||||
|
||||
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
|
||||
static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
|
||||
struct kobj_attribute *attr, \
|
||||
char *page) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
|
||||
return sprintf(page, _format "\n"); \
|
||||
} \
|
||||
static struct kobj_attribute format_attr_##_var = \
|
||||
__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
|
||||
|
||||
static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf);
|
||||
|
||||
struct perf_cstate_msr {
|
||||
u64 msr;
|
||||
struct perf_pmu_events_attr *attr;
|
||||
bool (*test)(int idx);
|
||||
};
|
||||
|
||||
|
||||
/* cstate_core PMU */
|
||||
|
||||
static struct pmu cstate_core_pmu;
|
||||
static bool has_cstate_core;
|
||||
|
||||
enum perf_cstate_core_id {
|
||||
/*
|
||||
* cstate_core events
|
||||
*/
|
||||
PERF_CSTATE_CORE_C1_RES = 0,
|
||||
PERF_CSTATE_CORE_C3_RES,
|
||||
PERF_CSTATE_CORE_C6_RES,
|
||||
PERF_CSTATE_CORE_C7_RES,
|
||||
|
||||
PERF_CSTATE_CORE_EVENT_MAX,
|
||||
};
|
||||
|
||||
bool test_core(int idx)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
||||
boot_cpu_data.x86 != 6)
|
||||
return false;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 30: /* 45nm Nehalem */
|
||||
case 26: /* 45nm Nehalem-EP */
|
||||
case 46: /* 45nm Nehalem-EX */
|
||||
|
||||
case 37: /* 32nm Westmere */
|
||||
case 44: /* 32nm Westmere-EP */
|
||||
case 47: /* 32nm Westmere-EX */
|
||||
if (idx == PERF_CSTATE_CORE_C3_RES ||
|
||||
idx == PERF_CSTATE_CORE_C6_RES)
|
||||
return true;
|
||||
break;
|
||||
case 42: /* 32nm SandyBridge */
|
||||
case 45: /* 32nm SandyBridge-E/EN/EP */
|
||||
|
||||
case 58: /* 22nm IvyBridge */
|
||||
case 62: /* 22nm IvyBridge-EP/EX */
|
||||
|
||||
case 60: /* 22nm Haswell Core */
|
||||
case 63: /* 22nm Haswell Server */
|
||||
case 69: /* 22nm Haswell ULT */
|
||||
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
|
||||
|
||||
case 61: /* 14nm Broadwell Core-M */
|
||||
case 86: /* 14nm Broadwell Xeon D */
|
||||
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
|
||||
case 79: /* 14nm Broadwell Server */
|
||||
|
||||
case 78: /* 14nm Skylake Mobile */
|
||||
case 94: /* 14nm Skylake Desktop */
|
||||
if (idx == PERF_CSTATE_CORE_C3_RES ||
|
||||
idx == PERF_CSTATE_CORE_C6_RES ||
|
||||
idx == PERF_CSTATE_CORE_C7_RES)
|
||||
return true;
|
||||
break;
|
||||
case 55: /* 22nm Atom "Silvermont" */
|
||||
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
|
||||
case 76: /* 14nm Atom "Airmont" */
|
||||
if (idx == PERF_CSTATE_CORE_C1_RES ||
|
||||
idx == PERF_CSTATE_CORE_C6_RES)
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
|
||||
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
|
||||
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
|
||||
PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
|
||||
|
||||
static struct perf_cstate_msr core_msr[] = {
|
||||
[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, },
|
||||
[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, },
|
||||
[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, },
|
||||
[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, },
|
||||
};
|
||||
|
||||
static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group core_events_attr_group = {
|
||||
.name = "events",
|
||||
.attrs = core_events_attrs,
|
||||
};
|
||||
|
||||
DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
|
||||
static struct attribute *core_format_attrs[] = {
|
||||
&format_attr_core_event.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group core_format_attr_group = {
|
||||
.name = "format",
|
||||
.attrs = core_format_attrs,
|
||||
};
|
||||
|
||||
static cpumask_t cstate_core_cpu_mask;
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
|
||||
|
||||
static struct attribute *cstate_cpumask_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group cpumask_attr_group = {
|
||||
.attrs = cstate_cpumask_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group *core_attr_groups[] = {
|
||||
&core_events_attr_group,
|
||||
&core_format_attr_group,
|
||||
&cpumask_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* cstate_core PMU end */
|
||||
|
||||
|
||||
/* cstate_pkg PMU */
|
||||
|
||||
static struct pmu cstate_pkg_pmu;
|
||||
static bool has_cstate_pkg;
|
||||
|
||||
enum perf_cstate_pkg_id {
|
||||
/*
|
||||
* cstate_pkg events
|
||||
*/
|
||||
PERF_CSTATE_PKG_C2_RES = 0,
|
||||
PERF_CSTATE_PKG_C3_RES,
|
||||
PERF_CSTATE_PKG_C6_RES,
|
||||
PERF_CSTATE_PKG_C7_RES,
|
||||
PERF_CSTATE_PKG_C8_RES,
|
||||
PERF_CSTATE_PKG_C9_RES,
|
||||
PERF_CSTATE_PKG_C10_RES,
|
||||
|
||||
PERF_CSTATE_PKG_EVENT_MAX,
|
||||
};
|
||||
|
||||
bool test_pkg(int idx)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
||||
boot_cpu_data.x86 != 6)
|
||||
return false;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 30: /* 45nm Nehalem */
|
||||
case 26: /* 45nm Nehalem-EP */
|
||||
case 46: /* 45nm Nehalem-EX */
|
||||
|
||||
case 37: /* 32nm Westmere */
|
||||
case 44: /* 32nm Westmere-EP */
|
||||
case 47: /* 32nm Westmere-EX */
|
||||
if (idx == PERF_CSTATE_CORE_C3_RES ||
|
||||
idx == PERF_CSTATE_CORE_C6_RES ||
|
||||
idx == PERF_CSTATE_CORE_C7_RES)
|
||||
return true;
|
||||
break;
|
||||
case 42: /* 32nm SandyBridge */
|
||||
case 45: /* 32nm SandyBridge-E/EN/EP */
|
||||
|
||||
case 58: /* 22nm IvyBridge */
|
||||
case 62: /* 22nm IvyBridge-EP/EX */
|
||||
|
||||
case 60: /* 22nm Haswell Core */
|
||||
case 63: /* 22nm Haswell Server */
|
||||
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
|
||||
|
||||
case 61: /* 14nm Broadwell Core-M */
|
||||
case 86: /* 14nm Broadwell Xeon D */
|
||||
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
|
||||
case 79: /* 14nm Broadwell Server */
|
||||
|
||||
case 78: /* 14nm Skylake Mobile */
|
||||
case 94: /* 14nm Skylake Desktop */
|
||||
if (idx == PERF_CSTATE_PKG_C2_RES ||
|
||||
idx == PERF_CSTATE_PKG_C3_RES ||
|
||||
idx == PERF_CSTATE_PKG_C6_RES ||
|
||||
idx == PERF_CSTATE_PKG_C7_RES)
|
||||
return true;
|
||||
break;
|
||||
case 55: /* 22nm Atom "Silvermont" */
|
||||
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
|
||||
case 76: /* 14nm Atom "Airmont" */
|
||||
if (idx == PERF_CSTATE_CORE_C6_RES)
|
||||
return true;
|
||||
break;
|
||||
case 69: /* 22nm Haswell ULT */
|
||||
if (idx == PERF_CSTATE_PKG_C2_RES ||
|
||||
idx == PERF_CSTATE_PKG_C3_RES ||
|
||||
idx == PERF_CSTATE_PKG_C6_RES ||
|
||||
idx == PERF_CSTATE_PKG_C7_RES ||
|
||||
idx == PERF_CSTATE_PKG_C8_RES ||
|
||||
idx == PERF_CSTATE_PKG_C9_RES ||
|
||||
idx == PERF_CSTATE_PKG_C10_RES)
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
|
||||
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
|
||||
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
|
||||
PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
|
||||
PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
|
||||
PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
|
||||
PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
|
||||
|
||||
static struct perf_cstate_msr pkg_msr[] = {
|
||||
[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, },
|
||||
[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, },
|
||||
};
|
||||
|
||||
static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group pkg_events_attr_group = {
|
||||
.name = "events",
|
||||
.attrs = pkg_events_attrs,
|
||||
};
|
||||
|
||||
DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
|
||||
static struct attribute *pkg_format_attrs[] = {
|
||||
&format_attr_pkg_event.attr,
|
||||
NULL,
|
||||
};
|
||||
static struct attribute_group pkg_format_attr_group = {
|
||||
.name = "format",
|
||||
.attrs = pkg_format_attrs,
|
||||
};
|
||||
|
||||
static cpumask_t cstate_pkg_cpu_mask;
|
||||
|
||||
static const struct attribute_group *pkg_attr_groups[] = {
|
||||
&pkg_events_attr_group,
|
||||
&pkg_format_attr_group,
|
||||
&cpumask_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* cstate_pkg PMU end*/
|
||||
|
||||
static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
|
||||
if (pmu == &cstate_core_pmu)
|
||||
return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
|
||||
else if (pmu == &cstate_pkg_pmu)
|
||||
return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cstate_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 cfg = event->attr.config;
|
||||
int ret = 0;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest ||
|
||||
event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
if (event->pmu == &cstate_core_pmu) {
|
||||
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
|
||||
return -EINVAL;
|
||||
if (!core_msr[cfg].attr)
|
||||
return -EINVAL;
|
||||
event->hw.event_base = core_msr[cfg].msr;
|
||||
} else if (event->pmu == &cstate_pkg_pmu) {
|
||||
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
|
||||
return -EINVAL;
|
||||
if (!pkg_msr[cfg].attr)
|
||||
return -EINVAL;
|
||||
event->hw.event_base = pkg_msr[cfg].msr;
|
||||
} else
|
||||
return -ENOENT;
|
||||
|
||||
/* must be done before validate_group */
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u64 cstate_pmu_read_counter(struct perf_event *event)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(event->hw.event_base, val);
|
||||
return val;
|
||||
}
|
||||
|
||||
static void cstate_pmu_event_update(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 prev_raw_count, new_raw_count;
|
||||
|
||||
again:
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
new_raw_count = cstate_pmu_read_counter(event);
|
||||
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count)
|
||||
goto again;
|
||||
|
||||
local64_add(new_raw_count - prev_raw_count, &event->count);
|
||||
}
|
||||
|
||||
static void cstate_pmu_event_start(struct perf_event *event, int mode)
|
||||
{
|
||||
local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
|
||||
}
|
||||
|
||||
static void cstate_pmu_event_stop(struct perf_event *event, int mode)
|
||||
{
|
||||
cstate_pmu_event_update(event);
|
||||
}
|
||||
|
||||
static void cstate_pmu_event_del(struct perf_event *event, int mode)
|
||||
{
|
||||
cstate_pmu_event_stop(event, PERF_EF_UPDATE);
|
||||
}
|
||||
|
||||
static int cstate_pmu_event_add(struct perf_event *event, int mode)
|
||||
{
|
||||
if (mode & PERF_EF_START)
|
||||
cstate_pmu_event_start(event, mode);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cstate_cpu_exit(int cpu)
|
||||
{
|
||||
int i, id, target;
|
||||
|
||||
/* cpu exit for cstate core */
|
||||
if (has_cstate_core) {
|
||||
id = topology_core_id(cpu);
|
||||
target = -1;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
if (i == cpu)
|
||||
continue;
|
||||
if (id == topology_core_id(i)) {
|
||||
target = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
|
||||
cpumask_set_cpu(target, &cstate_core_cpu_mask);
|
||||
WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
|
||||
if (target >= 0)
|
||||
perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
|
||||
}
|
||||
|
||||
/* cpu exit for cstate pkg */
|
||||
if (has_cstate_pkg) {
|
||||
id = topology_physical_package_id(cpu);
|
||||
target = -1;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
if (i == cpu)
|
||||
continue;
|
||||
if (id == topology_physical_package_id(i)) {
|
||||
target = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
|
||||
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
|
||||
WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
|
||||
if (target >= 0)
|
||||
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
|
||||
}
|
||||
}
|
||||
|
||||
static void cstate_cpu_init(int cpu)
|
||||
{
|
||||
int i, id;
|
||||
|
||||
/* cpu init for cstate core */
|
||||
if (has_cstate_core) {
|
||||
id = topology_core_id(cpu);
|
||||
for_each_cpu(i, &cstate_core_cpu_mask) {
|
||||
if (id == topology_core_id(i))
|
||||
break;
|
||||
}
|
||||
if (i >= nr_cpu_ids)
|
||||
cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
|
||||
}
|
||||
|
||||
/* cpu init for cstate pkg */
|
||||
if (has_cstate_pkg) {
|
||||
id = topology_physical_package_id(cpu);
|
||||
for_each_cpu(i, &cstate_pkg_cpu_mask) {
|
||||
if (id == topology_physical_package_id(i))
|
||||
break;
|
||||
}
|
||||
if (i >= nr_cpu_ids)
|
||||
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
|
||||
}
|
||||
}
|
||||
|
||||
static int cstate_cpu_notifier(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (long)hcpu;
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_UP_PREPARE:
|
||||
break;
|
||||
case CPU_STARTING:
|
||||
cstate_cpu_init(cpu);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DYING:
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
case CPU_DEAD:
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
cstate_cpu_exit(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe the cstate events and insert the available one into sysfs attrs
|
||||
* Return false if there is no available events.
|
||||
*/
|
||||
static bool cstate_probe_msr(struct perf_cstate_msr *msr,
|
||||
struct attribute **events_attrs,
|
||||
int max_event_nr)
|
||||
{
|
||||
int i, j = 0;
|
||||
u64 val;
|
||||
|
||||
/* Probe the cstate events. */
|
||||
for (i = 0; i < max_event_nr; i++) {
|
||||
if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
|
||||
msr[i].attr = NULL;
|
||||
}
|
||||
|
||||
/* List remaining events in the sysfs attrs. */
|
||||
for (i = 0; i < max_event_nr; i++) {
|
||||
if (msr[i].attr)
|
||||
events_attrs[j++] = &msr[i].attr->attr.attr;
|
||||
}
|
||||
events_attrs[j] = NULL;
|
||||
|
||||
return (j > 0) ? true : false;
|
||||
}
|
||||
|
||||
static int __init cstate_init(void)
|
||||
{
|
||||
/* SLM has different MSR for PKG C6 */
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 55:
|
||||
case 76:
|
||||
case 77:
|
||||
pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
|
||||
}
|
||||
|
||||
if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
|
||||
has_cstate_core = true;
|
||||
|
||||
if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
|
||||
has_cstate_pkg = true;
|
||||
|
||||
return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
|
||||
}
|
||||
|
||||
static void __init cstate_cpumask_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
cstate_cpu_init(cpu);
|
||||
|
||||
__perf_cpu_notifier(cstate_cpu_notifier);
|
||||
|
||||
cpu_notifier_register_done();
|
||||
}
|
||||
|
||||
static struct pmu cstate_core_pmu = {
|
||||
.attr_groups = core_attr_groups,
|
||||
.name = "cstate_core",
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = cstate_pmu_event_init,
|
||||
.add = cstate_pmu_event_add, /* must have */
|
||||
.del = cstate_pmu_event_del, /* must have */
|
||||
.start = cstate_pmu_event_start,
|
||||
.stop = cstate_pmu_event_stop,
|
||||
.read = cstate_pmu_event_update,
|
||||
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
|
||||
};
|
||||
|
||||
static struct pmu cstate_pkg_pmu = {
|
||||
.attr_groups = pkg_attr_groups,
|
||||
.name = "cstate_pkg",
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = cstate_pmu_event_init,
|
||||
.add = cstate_pmu_event_add, /* must have */
|
||||
.del = cstate_pmu_event_del, /* must have */
|
||||
.start = cstate_pmu_event_start,
|
||||
.stop = cstate_pmu_event_stop,
|
||||
.read = cstate_pmu_event_update,
|
||||
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
|
||||
};
|
||||
|
||||
static void __init cstate_pmus_register(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (has_cstate_core) {
|
||||
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
|
||||
if (WARN_ON(err))
|
||||
pr_info("Failed to register PMU %s error %d\n",
|
||||
cstate_core_pmu.name, err);
|
||||
}
|
||||
|
||||
if (has_cstate_pkg) {
|
||||
err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
|
||||
if (WARN_ON(err))
|
||||
pr_info("Failed to register PMU %s error %d\n",
|
||||
cstate_pkg_pmu.name, err);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init cstate_pmu_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (cpu_has_hypervisor)
|
||||
return -ENODEV;
|
||||
|
||||
err = cstate_init();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
cstate_cpumask_init();
|
||||
|
||||
cstate_pmus_register();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
device_initcall(cstate_pmu_init);
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,783 +0,0 @@
|
||||
/*
|
||||
* perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
|
||||
* Copyright (C) 2013 Google, Inc., Stephane Eranian
|
||||
*
|
||||
* Intel RAPL interface is specified in the IA-32 Manual Vol3b
|
||||
* section 14.7.1 (September 2013)
|
||||
*
|
||||
* RAPL provides more controls than just reporting energy consumption
|
||||
* however here we only expose the 3 energy consumption free running
|
||||
* counters (pp0, pkg, dram).
|
||||
*
|
||||
* Each of those counters increments in a power unit defined by the
|
||||
* RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
|
||||
* but it can vary.
|
||||
*
|
||||
* Counter to rapl events mappings:
|
||||
*
|
||||
* pp0 counter: consumption of all physical cores (power plane 0)
|
||||
* event: rapl_energy_cores
|
||||
* perf code: 0x1
|
||||
*
|
||||
* pkg counter: consumption of the whole processor package
|
||||
* event: rapl_energy_pkg
|
||||
* perf code: 0x2
|
||||
*
|
||||
* dram counter: consumption of the dram domain (servers only)
|
||||
* event: rapl_energy_dram
|
||||
* perf code: 0x3
|
||||
*
|
||||
* dram counter: consumption of the builtin-gpu domain (client only)
|
||||
* event: rapl_energy_gpu
|
||||
* perf code: 0x4
|
||||
*
|
||||
* We manage those counters as free running (read-only). They may be
|
||||
* use simultaneously by other tools, such as turbostat.
|
||||
*
|
||||
* The events only support system-wide mode counting. There is no
|
||||
* sampling support because it does not make sense and is not
|
||||
* supported by the RAPL hardware.
|
||||
*
|
||||
* Because we want to avoid floating-point operations in the kernel,
|
||||
* the events are all reported in fixed point arithmetic (32.32).
|
||||
* Tools must adjust the counts to convert them to Watts using
|
||||
* the duration of the measurement. Tools may use a function such as
|
||||
* ldexp(raw_count, -32);
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include "perf_event.h"
|
||||
|
||||
/*
|
||||
* RAPL energy status counters
|
||||
*/
|
||||
#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */
|
||||
#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
|
||||
#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */
|
||||
#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
|
||||
#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
|
||||
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
|
||||
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
|
||||
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
|
||||
|
||||
#define NR_RAPL_DOMAINS 0x4
|
||||
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
||||
"pp0-core",
|
||||
"package",
|
||||
"dram",
|
||||
"pp1-gpu",
|
||||
};
|
||||
|
||||
/* Clients have PP0, PKG */
|
||||
#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
|
||||
1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_PP1_NRG_STAT)
|
||||
|
||||
/* Servers have PP0, PKG, RAM */
|
||||
#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\
|
||||
1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_RAM_NRG_STAT)
|
||||
|
||||
/* Servers have PP0, PKG, RAM, PP1 */
|
||||
#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
|
||||
1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_RAM_NRG_STAT|\
|
||||
1<<RAPL_IDX_PP1_NRG_STAT)
|
||||
|
||||
/* Knights Landing has PKG, RAM */
|
||||
#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||
1<<RAPL_IDX_RAM_NRG_STAT)
|
||||
|
||||
/*
|
||||
* event code: LSB 8 bits, passed in attr->config
|
||||
* any other bit is reserved
|
||||
*/
|
||||
#define RAPL_EVENT_MASK 0xFFULL
|
||||
|
||||
#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \
|
||||
static ssize_t __rapl_##_var##_show(struct kobject *kobj, \
|
||||
struct kobj_attribute *attr, \
|
||||
char *page) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
|
||||
return sprintf(page, _format "\n"); \
|
||||
} \
|
||||
static struct kobj_attribute format_attr_##_var = \
|
||||
__ATTR(_name, 0444, __rapl_##_var##_show, NULL)
|
||||
|
||||
#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
|
||||
|
||||
#define RAPL_EVENT_ATTR_STR(_name, v, str) \
|
||||
static struct perf_pmu_events_attr event_attr_##v = { \
|
||||
.attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
|
||||
.id = 0, \
|
||||
.event_str = str, \
|
||||
};
|
||||
|
||||
struct rapl_pmu {
|
||||
spinlock_t lock;
|
||||
int n_active; /* number of active events */
|
||||
struct list_head active_list;
|
||||
struct pmu *pmu; /* pointer to rapl_pmu_class */
|
||||
ktime_t timer_interval; /* in ktime_t unit */
|
||||
struct hrtimer hrtimer;
|
||||
};
|
||||
|
||||
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; /* 1/2^hw_unit Joule */
|
||||
static struct pmu rapl_pmu_class;
|
||||
static cpumask_t rapl_cpu_mask;
|
||||
static int rapl_cntr_mask;
|
||||
|
||||
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
|
||||
static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
|
||||
|
||||
static struct x86_pmu_quirk *rapl_quirks;
|
||||
static inline u64 rapl_read_counter(struct perf_event *event)
|
||||
{
|
||||
u64 raw;
|
||||
rdmsrl(event->hw.event_base, raw);
|
||||
return raw;
|
||||
}
|
||||
|
||||
#define rapl_add_quirk(func_) \
|
||||
do { \
|
||||
static struct x86_pmu_quirk __quirk __initdata = { \
|
||||
.func = func_, \
|
||||
}; \
|
||||
__quirk.next = rapl_quirks; \
|
||||
rapl_quirks = &__quirk; \
|
||||
} while (0)
|
||||
|
||||
static inline u64 rapl_scale(u64 v, int cfg)
|
||||
{
|
||||
if (cfg > NR_RAPL_DOMAINS) {
|
||||
pr_warn("invalid domain %d, failed to scale data\n", cfg);
|
||||
return v;
|
||||
}
|
||||
/*
|
||||
* scale delta to smallest unit (1/2^32)
|
||||
* users must then scale back: count * 1/(1e9*2^32) to get Joules
|
||||
* or use ldexp(count, -32).
|
||||
* Watts = Joules/Time delta
|
||||
*/
|
||||
return v << (32 - rapl_hw_unit[cfg - 1]);
|
||||
}
|
||||
|
||||
static u64 rapl_event_update(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 prev_raw_count, new_raw_count;
|
||||
s64 delta, sdelta;
|
||||
int shift = RAPL_CNTR_WIDTH;
|
||||
|
||||
again:
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
rdmsrl(event->hw.event_base, new_raw_count);
|
||||
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count) {
|
||||
cpu_relax();
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we have the new raw value and have updated the prev
|
||||
* timestamp already. We can now calculate the elapsed delta
|
||||
* (event-)time and add that to the generic event.
|
||||
*
|
||||
* Careful, not all hw sign-extends above the physical width
|
||||
* of the count.
|
||||
*/
|
||||
delta = (new_raw_count << shift) - (prev_raw_count << shift);
|
||||
delta >>= shift;
|
||||
|
||||
sdelta = rapl_scale(delta, event->hw.config);
|
||||
|
||||
local64_add(sdelta, &event->count);
|
||||
|
||||
return new_raw_count;
|
||||
}
|
||||
|
||||
static void rapl_start_hrtimer(struct rapl_pmu *pmu)
|
||||
{
|
||||
hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
|
||||
HRTIMER_MODE_REL_PINNED);
|
||||
}
|
||||
|
||||
static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
|
||||
{
|
||||
hrtimer_cancel(&pmu->hrtimer);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
|
||||
{
|
||||
struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
|
||||
struct perf_event *event;
|
||||
unsigned long flags;
|
||||
|
||||
if (!pmu->n_active)
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
spin_lock_irqsave(&pmu->lock, flags);
|
||||
|
||||
list_for_each_entry(event, &pmu->active_list, active_entry) {
|
||||
rapl_event_update(event);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
|
||||
hrtimer_forward_now(hrtimer, pmu->timer_interval);
|
||||
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
static void rapl_hrtimer_init(struct rapl_pmu *pmu)
|
||||
{
|
||||
struct hrtimer *hr = &pmu->hrtimer;
|
||||
|
||||
hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hr->function = rapl_hrtimer_handle;
|
||||
}
|
||||
|
||||
static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
event->hw.state = 0;
|
||||
|
||||
list_add_tail(&event->active_entry, &pmu->active_list);
|
||||
|
||||
local64_set(&event->hw.prev_count, rapl_read_counter(event));
|
||||
|
||||
pmu->n_active++;
|
||||
if (pmu->n_active == 1)
|
||||
rapl_start_hrtimer(pmu);
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_start(struct perf_event *event, int mode)
|
||||
{
|
||||
struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pmu->lock, flags);
|
||||
__rapl_pmu_event_start(pmu, event);
|
||||
spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_stop(struct perf_event *event, int mode)
|
||||
{
|
||||
struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pmu->lock, flags);
|
||||
|
||||
/* mark event as deactivated and stopped */
|
||||
if (!(hwc->state & PERF_HES_STOPPED)) {
|
||||
WARN_ON_ONCE(pmu->n_active <= 0);
|
||||
pmu->n_active--;
|
||||
if (pmu->n_active == 0)
|
||||
rapl_stop_hrtimer(pmu);
|
||||
|
||||
list_del(&event->active_entry);
|
||||
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
/* check if update of sw counter is necessary */
|
||||
if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
|
||||
/*
|
||||
* Drain the remaining delta count out of a event
|
||||
* that we are disabling:
|
||||
*/
|
||||
rapl_event_update(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
}
|
||||
|
||||
static int rapl_pmu_event_add(struct perf_event *event, int mode)
|
||||
{
|
||||
struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pmu->lock, flags);
|
||||
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
if (mode & PERF_EF_START)
|
||||
__rapl_pmu_event_start(pmu, event);
|
||||
|
||||
spin_unlock_irqrestore(&pmu->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
rapl_pmu_event_stop(event, PERF_EF_UPDATE);
|
||||
}
|
||||
|
||||
static int rapl_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
|
||||
int bit, msr, ret = 0;
|
||||
|
||||
/* only look at RAPL events */
|
||||
if (event->attr.type != rapl_pmu_class.type)
|
||||
return -ENOENT;
|
||||
|
||||
/* check only supported bits are set */
|
||||
if (event->attr.config & ~RAPL_EVENT_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* check event is known (determines counter)
|
||||
*/
|
||||
switch (cfg) {
|
||||
case INTEL_RAPL_PP0:
|
||||
bit = RAPL_IDX_PP0_NRG_STAT;
|
||||
msr = MSR_PP0_ENERGY_STATUS;
|
||||
break;
|
||||
case INTEL_RAPL_PKG:
|
||||
bit = RAPL_IDX_PKG_NRG_STAT;
|
||||
msr = MSR_PKG_ENERGY_STATUS;
|
||||
break;
|
||||
case INTEL_RAPL_RAM:
|
||||
bit = RAPL_IDX_RAM_NRG_STAT;
|
||||
msr = MSR_DRAM_ENERGY_STATUS;
|
||||
break;
|
||||
case INTEL_RAPL_PP1:
|
||||
bit = RAPL_IDX_PP1_NRG_STAT;
|
||||
msr = MSR_PP1_ENERGY_STATUS;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
/* check event supported */
|
||||
if (!(rapl_cntr_mask & (1 << bit)))
|
||||
return -EINVAL;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest ||
|
||||
event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
/* must be done before validate_group */
|
||||
event->hw.event_base = msr;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = bit;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void rapl_pmu_event_read(struct perf_event *event)
|
||||
{
|
||||
rapl_event_update(event);
|
||||
}
|
||||
|
||||
static ssize_t rapl_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
|
||||
|
||||
static struct attribute *rapl_pmu_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_pmu_attr_group = {
|
||||
.attrs = rapl_pmu_attrs,
|
||||
};
|
||||
|
||||
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
|
||||
RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
|
||||
|
||||
RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
|
||||
RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
|
||||
|
||||
/*
|
||||
* we compute in 0.23 nJ increments regardless of MSR
|
||||
*/
|
||||
RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
|
||||
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
|
||||
|
||||
static struct attribute *rapl_events_srv_attr[] = {
|
||||
EVENT_PTR(rapl_cores),
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_ram),
|
||||
|
||||
EVENT_PTR(rapl_cores_unit),
|
||||
EVENT_PTR(rapl_pkg_unit),
|
||||
EVENT_PTR(rapl_ram_unit),
|
||||
|
||||
EVENT_PTR(rapl_cores_scale),
|
||||
EVENT_PTR(rapl_pkg_scale),
|
||||
EVENT_PTR(rapl_ram_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_cln_attr[] = {
|
||||
EVENT_PTR(rapl_cores),
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_gpu),
|
||||
|
||||
EVENT_PTR(rapl_cores_unit),
|
||||
EVENT_PTR(rapl_pkg_unit),
|
||||
EVENT_PTR(rapl_gpu_unit),
|
||||
|
||||
EVENT_PTR(rapl_cores_scale),
|
||||
EVENT_PTR(rapl_pkg_scale),
|
||||
EVENT_PTR(rapl_gpu_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_hsw_attr[] = {
|
||||
EVENT_PTR(rapl_cores),
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_gpu),
|
||||
EVENT_PTR(rapl_ram),
|
||||
|
||||
EVENT_PTR(rapl_cores_unit),
|
||||
EVENT_PTR(rapl_pkg_unit),
|
||||
EVENT_PTR(rapl_gpu_unit),
|
||||
EVENT_PTR(rapl_ram_unit),
|
||||
|
||||
EVENT_PTR(rapl_cores_scale),
|
||||
EVENT_PTR(rapl_pkg_scale),
|
||||
EVENT_PTR(rapl_gpu_scale),
|
||||
EVENT_PTR(rapl_ram_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *rapl_events_knl_attr[] = {
|
||||
EVENT_PTR(rapl_pkg),
|
||||
EVENT_PTR(rapl_ram),
|
||||
|
||||
EVENT_PTR(rapl_pkg_unit),
|
||||
EVENT_PTR(rapl_ram_unit),
|
||||
|
||||
EVENT_PTR(rapl_pkg_scale),
|
||||
EVENT_PTR(rapl_ram_scale),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_pmu_events_group = {
|
||||
.name = "events",
|
||||
.attrs = NULL, /* patched at runtime */
|
||||
};
|
||||
|
||||
DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
|
||||
static struct attribute *rapl_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_pmu_format_group = {
|
||||
.name = "format",
|
||||
.attrs = rapl_formats_attr,
|
||||
};
|
||||
|
||||
const struct attribute_group *rapl_attr_groups[] = {
|
||||
&rapl_pmu_attr_group,
|
||||
&rapl_pmu_format_group,
|
||||
&rapl_pmu_events_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct pmu rapl_pmu_class = {
|
||||
.attr_groups = rapl_attr_groups,
|
||||
.task_ctx_nr = perf_invalid_context, /* system-wide only */
|
||||
.event_init = rapl_pmu_event_init,
|
||||
.add = rapl_pmu_event_add, /* must have */
|
||||
.del = rapl_pmu_event_del, /* must have */
|
||||
.start = rapl_pmu_event_start,
|
||||
.stop = rapl_pmu_event_stop,
|
||||
.read = rapl_pmu_event_read,
|
||||
};
|
||||
|
||||
static void rapl_cpu_exit(int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
|
||||
int i, phys_id = topology_physical_package_id(cpu);
|
||||
int target = -1;
|
||||
|
||||
/* find a new cpu on same package */
|
||||
for_each_online_cpu(i) {
|
||||
if (i == cpu)
|
||||
continue;
|
||||
if (phys_id == topology_physical_package_id(i)) {
|
||||
target = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* clear cpu from cpumask
|
||||
* if was set in cpumask and still some cpu on package,
|
||||
* then move to new cpu
|
||||
*/
|
||||
if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
|
||||
cpumask_set_cpu(target, &rapl_cpu_mask);
|
||||
|
||||
WARN_ON(cpumask_empty(&rapl_cpu_mask));
|
||||
/*
|
||||
* migrate events and context to new cpu
|
||||
*/
|
||||
if (target >= 0)
|
||||
perf_pmu_migrate_context(pmu->pmu, cpu, target);
|
||||
|
||||
/* cancel overflow polling timer for CPU */
|
||||
rapl_stop_hrtimer(pmu);
|
||||
}
|
||||
|
||||
static void rapl_cpu_init(int cpu)
|
||||
{
|
||||
int i, phys_id = topology_physical_package_id(cpu);
|
||||
|
||||
/* check if phys_is is already covered */
|
||||
for_each_cpu(i, &rapl_cpu_mask) {
|
||||
if (phys_id == topology_physical_package_id(i))
|
||||
return;
|
||||
}
|
||||
/* was not found, so add it */
|
||||
cpumask_set_cpu(cpu, &rapl_cpu_mask);
|
||||
}
|
||||
|
||||
static __init void rapl_hsw_server_quirk(void)
|
||||
{
|
||||
/*
|
||||
* DRAM domain on HSW server has fixed energy unit which can be
|
||||
* different than the unit from power unit MSR.
|
||||
* "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
|
||||
* of 2. Datasheet, September 2014, Reference Number: 330784-001 "
|
||||
*/
|
||||
rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
|
||||
}
|
||||
|
||||
static int rapl_cpu_prepare(int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
|
||||
int phys_id = topology_physical_package_id(cpu);
|
||||
u64 ms;
|
||||
|
||||
if (pmu)
|
||||
return 0;
|
||||
|
||||
if (phys_id < 0)
|
||||
return -1;
|
||||
|
||||
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (!pmu)
|
||||
return -1;
|
||||
spin_lock_init(&pmu->lock);
|
||||
|
||||
INIT_LIST_HEAD(&pmu->active_list);
|
||||
|
||||
pmu->pmu = &rapl_pmu_class;
|
||||
|
||||
/*
|
||||
* use reference of 200W for scaling the timeout
|
||||
* to avoid missing counter overflows.
|
||||
* 200W = 200 Joules/sec
|
||||
* divide interval by 2 to avoid lockstep (2 * 100)
|
||||
* if hw unit is 32, then we use 2 ms 1/200/2
|
||||
*/
|
||||
if (rapl_hw_unit[0] < 32)
|
||||
ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
|
||||
else
|
||||
ms = 2;
|
||||
|
||||
pmu->timer_interval = ms_to_ktime(ms);
|
||||
|
||||
rapl_hrtimer_init(pmu);
|
||||
|
||||
/* set RAPL pmu for this cpu for now */
|
||||
per_cpu(rapl_pmu, cpu) = pmu;
|
||||
per_cpu(rapl_pmu_to_free, cpu) = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rapl_cpu_kfree(int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
|
||||
|
||||
kfree(pmu);
|
||||
|
||||
per_cpu(rapl_pmu_to_free, cpu) = NULL;
|
||||
}
|
||||
|
||||
static int rapl_cpu_dying(int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
|
||||
|
||||
if (!pmu)
|
||||
return 0;
|
||||
|
||||
per_cpu(rapl_pmu, cpu) = NULL;
|
||||
|
||||
per_cpu(rapl_pmu_to_free, cpu) = pmu;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rapl_cpu_notifier(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (long)hcpu;
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_UP_PREPARE:
|
||||
rapl_cpu_prepare(cpu);
|
||||
break;
|
||||
case CPU_STARTING:
|
||||
rapl_cpu_init(cpu);
|
||||
break;
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DYING:
|
||||
rapl_cpu_dying(cpu);
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
case CPU_DEAD:
|
||||
rapl_cpu_kfree(cpu);
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
rapl_cpu_exit(cpu);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static int rapl_check_hw_unit(void)
|
||||
{
|
||||
u64 msr_rapl_power_unit_bits;
|
||||
int i;
|
||||
|
||||
/* protect rdmsrl() to handle virtualization */
|
||||
if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
|
||||
return -1;
|
||||
for (i = 0; i < NR_RAPL_DOMAINS; i++)
|
||||
rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct x86_cpu_id rapl_cpu_match[] = {
|
||||
[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
|
||||
[1] = {},
|
||||
};
|
||||
|
||||
static int __init rapl_pmu_init(void)
|
||||
{
|
||||
struct rapl_pmu *pmu;
|
||||
int cpu, ret;
|
||||
struct x86_pmu_quirk *quirk;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* check for Intel processor family 6
|
||||
*/
|
||||
if (!x86_match_cpu(rapl_cpu_match))
|
||||
return 0;
|
||||
|
||||
/* check supported CPU */
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 42: /* Sandy Bridge */
|
||||
case 58: /* Ivy Bridge */
|
||||
rapl_cntr_mask = RAPL_IDX_CLN;
|
||||
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
|
||||
break;
|
||||
case 63: /* Haswell-Server */
|
||||
rapl_add_quirk(rapl_hsw_server_quirk);
|
||||
rapl_cntr_mask = RAPL_IDX_SRV;
|
||||
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
|
||||
break;
|
||||
case 60: /* Haswell */
|
||||
case 69: /* Haswell-Celeron */
|
||||
case 61: /* Broadwell */
|
||||
rapl_cntr_mask = RAPL_IDX_HSW;
|
||||
rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
|
||||
break;
|
||||
case 45: /* Sandy Bridge-EP */
|
||||
case 62: /* IvyTown */
|
||||
rapl_cntr_mask = RAPL_IDX_SRV;
|
||||
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
|
||||
break;
|
||||
case 87: /* Knights Landing */
|
||||
rapl_add_quirk(rapl_hsw_server_quirk);
|
||||
rapl_cntr_mask = RAPL_IDX_KNL;
|
||||
rapl_pmu_events_group.attrs = rapl_events_knl_attr;
|
||||
|
||||
default:
|
||||
/* unsupported */
|
||||
return 0;
|
||||
}
|
||||
ret = rapl_check_hw_unit();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* run cpu model quirks */
|
||||
for (quirk = rapl_quirks; quirk; quirk = quirk->next)
|
||||
quirk->func();
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
ret = rapl_cpu_prepare(cpu);
|
||||
if (ret)
|
||||
goto out;
|
||||
rapl_cpu_init(cpu);
|
||||
}
|
||||
|
||||
__perf_cpu_notifier(rapl_cpu_notifier);
|
||||
|
||||
ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
|
||||
if (WARN_ON(ret)) {
|
||||
pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
|
||||
cpu_notifier_register_done();
|
||||
return -1;
|
||||
}
|
||||
|
||||
pmu = __this_cpu_read(rapl_pmu);
|
||||
|
||||
pr_info("RAPL PMU detected,"
|
||||
" API unit is 2^-32 Joules,"
|
||||
" %d fixed counters"
|
||||
" %llu ms ovfl timer\n",
|
||||
hweight32(rapl_cntr_mask),
|
||||
ktime_to_ms(pmu->timer_interval));
|
||||
for (i = 0; i < NR_RAPL_DOMAINS; i++) {
|
||||
if (rapl_cntr_mask & (1 << i)) {
|
||||
pr_info("hw unit of domain %s 2^-%d Joules\n",
|
||||
rapl_domain_names[i], rapl_hw_unit[i]);
|
||||
}
|
||||
}
|
||||
out:
|
||||
cpu_notifier_register_done();
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(rapl_pmu_init);
|
File diff suppressed because it is too large
Load Diff
@@ -1,357 +0,0 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include "perf_event.h"
|
||||
|
||||
#define UNCORE_PMU_NAME_LEN 32
|
||||
#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
|
||||
#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
|
||||
|
||||
#define UNCORE_FIXED_EVENT 0xff
|
||||
#define UNCORE_PMC_IDX_MAX_GENERIC 8
|
||||
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
|
||||
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
|
||||
|
||||
#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
|
||||
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
|
||||
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
|
||||
#define UNCORE_EXTRA_PCI_DEV 0xff
|
||||
#define UNCORE_EXTRA_PCI_DEV_MAX 3
|
||||
|
||||
/* support up to 8 sockets */
|
||||
#define UNCORE_SOCKET_MAX 8
|
||||
|
||||
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
|
||||
|
||||
struct intel_uncore_ops;
|
||||
struct intel_uncore_pmu;
|
||||
struct intel_uncore_box;
|
||||
struct uncore_event_desc;
|
||||
|
||||
struct intel_uncore_type {
|
||||
const char *name;
|
||||
int num_counters;
|
||||
int num_boxes;
|
||||
int perf_ctr_bits;
|
||||
int fixed_ctr_bits;
|
||||
unsigned perf_ctr;
|
||||
unsigned event_ctl;
|
||||
unsigned event_mask;
|
||||
unsigned fixed_ctr;
|
||||
unsigned fixed_ctl;
|
||||
unsigned box_ctl;
|
||||
unsigned msr_offset;
|
||||
unsigned num_shared_regs:8;
|
||||
unsigned single_fixed:1;
|
||||
unsigned pair_ctr_ctl:1;
|
||||
unsigned *msr_offsets;
|
||||
struct event_constraint unconstrainted;
|
||||
struct event_constraint *constraints;
|
||||
struct intel_uncore_pmu *pmus;
|
||||
struct intel_uncore_ops *ops;
|
||||
struct uncore_event_desc *event_descs;
|
||||
const struct attribute_group *attr_groups[4];
|
||||
struct pmu *pmu; /* for custom pmu ops */
|
||||
};
|
||||
|
||||
#define pmu_group attr_groups[0]
|
||||
#define format_group attr_groups[1]
|
||||
#define events_group attr_groups[2]
|
||||
|
||||
struct intel_uncore_ops {
|
||||
void (*init_box)(struct intel_uncore_box *);
|
||||
void (*disable_box)(struct intel_uncore_box *);
|
||||
void (*enable_box)(struct intel_uncore_box *);
|
||||
void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
|
||||
void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
|
||||
u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
|
||||
int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
|
||||
struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
|
||||
struct perf_event *);
|
||||
void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
|
||||
};
|
||||
|
||||
struct intel_uncore_pmu {
|
||||
struct pmu pmu;
|
||||
char name[UNCORE_PMU_NAME_LEN];
|
||||
int pmu_idx;
|
||||
int func_id;
|
||||
struct intel_uncore_type *type;
|
||||
struct intel_uncore_box ** __percpu box;
|
||||
struct list_head box_list;
|
||||
};
|
||||
|
||||
struct intel_uncore_extra_reg {
|
||||
raw_spinlock_t lock;
|
||||
u64 config, config1, config2;
|
||||
atomic_t ref;
|
||||
};
|
||||
|
||||
struct intel_uncore_box {
|
||||
int phys_id;
|
||||
int n_active; /* number of active events */
|
||||
int n_events;
|
||||
int cpu; /* cpu to collect events */
|
||||
unsigned long flags;
|
||||
atomic_t refcnt;
|
||||
struct perf_event *events[UNCORE_PMC_IDX_MAX];
|
||||
struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
|
||||
struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
|
||||
unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
|
||||
u64 tags[UNCORE_PMC_IDX_MAX];
|
||||
struct pci_dev *pci_dev;
|
||||
struct intel_uncore_pmu *pmu;
|
||||
u64 hrtimer_duration; /* hrtimer timeout for this box */
|
||||
struct hrtimer hrtimer;
|
||||
struct list_head list;
|
||||
struct list_head active_list;
|
||||
void *io_addr;
|
||||
struct intel_uncore_extra_reg shared_regs[0];
|
||||
};
|
||||
|
||||
#define UNCORE_BOX_FLAG_INITIATED 0
|
||||
|
||||
struct uncore_event_desc {
|
||||
struct kobj_attribute attr;
|
||||
const char *config;
|
||||
};
|
||||
|
||||
struct pci2phy_map {
|
||||
struct list_head list;
|
||||
int segment;
|
||||
int pbus_to_physid[256];
|
||||
};
|
||||
|
||||
int uncore_pcibus_to_physid(struct pci_bus *bus);
|
||||
struct pci2phy_map *__find_pci2phy_map(int segment);
|
||||
|
||||
ssize_t uncore_event_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf);
|
||||
|
||||
#define INTEL_UNCORE_EVENT_DESC(_name, _config) \
|
||||
{ \
|
||||
.attr = __ATTR(_name, 0444, uncore_event_show, NULL), \
|
||||
.config = _config, \
|
||||
}
|
||||
|
||||
#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
|
||||
static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
|
||||
struct kobj_attribute *attr, \
|
||||
char *page) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
|
||||
return sprintf(page, _format "\n"); \
|
||||
} \
|
||||
static struct kobj_attribute format_attr_##_var = \
|
||||
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
|
||||
|
||||
static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->box_ctl;
|
||||
}
|
||||
|
||||
static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->fixed_ctl;
|
||||
}
|
||||
|
||||
static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->fixed_ctr;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
return idx * 4 + box->pmu->type->event_ctl;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
return idx * 8 + box->pmu->type->perf_ctr;
|
||||
}
|
||||
|
||||
static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu = box->pmu;
|
||||
return pmu->type->msr_offsets ?
|
||||
pmu->type->msr_offsets[pmu->pmu_idx] :
|
||||
pmu->type->msr_offset * pmu->pmu_idx;
|
||||
}
|
||||
|
||||
static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
if (!box->pmu->type->box_ctl)
|
||||
return 0;
|
||||
return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
if (!box->pmu->type->fixed_ctl)
|
||||
return 0;
|
||||
return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
return box->pmu->type->event_ctl +
|
||||
(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
|
||||
uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
return box->pmu->type->perf_ctr +
|
||||
(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
|
||||
uncore_msr_box_offset(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pci_dev)
|
||||
return uncore_pci_fixed_ctl(box);
|
||||
else
|
||||
return uncore_msr_fixed_ctl(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pci_dev)
|
||||
return uncore_pci_fixed_ctr(box);
|
||||
else
|
||||
return uncore_msr_fixed_ctr(box);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
if (box->pci_dev)
|
||||
return uncore_pci_event_ctl(box, idx);
|
||||
else
|
||||
return uncore_msr_event_ctl(box, idx);
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
|
||||
{
|
||||
if (box->pci_dev)
|
||||
return uncore_pci_perf_ctr(box, idx);
|
||||
else
|
||||
return uncore_msr_perf_ctr(box, idx);
|
||||
}
|
||||
|
||||
static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->perf_ctr_bits;
|
||||
}
|
||||
|
||||
static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->fixed_ctr_bits;
|
||||
}
|
||||
|
||||
static inline int uncore_num_counters(struct intel_uncore_box *box)
|
||||
{
|
||||
return box->pmu->type->num_counters;
|
||||
}
|
||||
|
||||
static inline void uncore_disable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pmu->type->ops->disable_box)
|
||||
box->pmu->type->ops->disable_box(box);
|
||||
}
|
||||
|
||||
static inline void uncore_enable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pmu->type->ops->enable_box)
|
||||
box->pmu->type->ops->enable_box(box);
|
||||
}
|
||||
|
||||
static inline void uncore_disable_event(struct intel_uncore_box *box,
|
||||
struct perf_event *event)
|
||||
{
|
||||
box->pmu->type->ops->disable_event(box, event);
|
||||
}
|
||||
|
||||
static inline void uncore_enable_event(struct intel_uncore_box *box,
|
||||
struct perf_event *event)
|
||||
{
|
||||
box->pmu->type->ops->enable_event(box, event);
|
||||
}
|
||||
|
||||
static inline u64 uncore_read_counter(struct intel_uncore_box *box,
|
||||
struct perf_event *event)
|
||||
{
|
||||
return box->pmu->type->ops->read_counter(box, event);
|
||||
}
|
||||
|
||||
static inline void uncore_box_init(struct intel_uncore_box *box)
|
||||
{
|
||||
if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
|
||||
if (box->pmu->type->ops->init_box)
|
||||
box->pmu->type->ops->init_box(box);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
|
||||
{
|
||||
return (box->phys_id < 0);
|
||||
}
|
||||
|
||||
struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
|
||||
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
|
||||
struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
|
||||
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
|
||||
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
|
||||
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
|
||||
void uncore_pmu_event_read(struct perf_event *event);
|
||||
void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
|
||||
struct event_constraint *
|
||||
uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
|
||||
void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
|
||||
u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
|
||||
|
||||
extern struct intel_uncore_type **uncore_msr_uncores;
|
||||
extern struct intel_uncore_type **uncore_pci_uncores;
|
||||
extern struct pci_driver *uncore_pci_driver;
|
||||
extern raw_spinlock_t pci2phy_map_lock;
|
||||
extern struct list_head pci2phy_map_head;
|
||||
extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
|
||||
extern struct event_constraint uncore_constraint_empty;
|
||||
|
||||
/* perf_event_intel_uncore_snb.c */
|
||||
int snb_uncore_pci_init(void);
|
||||
int ivb_uncore_pci_init(void);
|
||||
int hsw_uncore_pci_init(void);
|
||||
int bdw_uncore_pci_init(void);
|
||||
int skl_uncore_pci_init(void);
|
||||
void snb_uncore_cpu_init(void);
|
||||
void nhm_uncore_cpu_init(void);
|
||||
int snb_pci2phy_map_init(int devid);
|
||||
|
||||
/* perf_event_intel_uncore_snbep.c */
|
||||
int snbep_uncore_pci_init(void);
|
||||
void snbep_uncore_cpu_init(void);
|
||||
int ivbep_uncore_pci_init(void);
|
||||
void ivbep_uncore_cpu_init(void);
|
||||
int hswep_uncore_pci_init(void);
|
||||
void hswep_uncore_cpu_init(void);
|
||||
int bdx_uncore_pci_init(void);
|
||||
void bdx_uncore_cpu_init(void);
|
||||
int knl_uncore_pci_init(void);
|
||||
void knl_uncore_cpu_init(void);
|
||||
|
||||
/* perf_event_intel_uncore_nhmex.c */
|
||||
void nhmex_uncore_cpu_init(void);
|
File diff suppressed because it is too large
Load Diff
@@ -1,717 +0,0 @@
|
||||
/* Nehalem/SandBridge/Haswell uncore support */
|
||||
#include "perf_event_intel_uncore.h"
|
||||
|
||||
/* Uncore IMC PCI IDs */
|
||||
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
|
||||
#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154
|
||||
#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
|
||||
#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
|
||||
#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
|
||||
#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
|
||||
#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f
|
||||
|
||||
/* SNB event control */
|
||||
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
|
||||
#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
|
||||
#define SNB_UNC_CTL_EDGE_DET (1 << 18)
|
||||
#define SNB_UNC_CTL_EN (1 << 22)
|
||||
#define SNB_UNC_CTL_INVERT (1 << 23)
|
||||
#define SNB_UNC_CTL_CMASK_MASK 0x1f000000
|
||||
#define NHM_UNC_CTL_CMASK_MASK 0xff000000
|
||||
#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0)
|
||||
|
||||
#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
|
||||
SNB_UNC_CTL_UMASK_MASK | \
|
||||
SNB_UNC_CTL_EDGE_DET | \
|
||||
SNB_UNC_CTL_INVERT | \
|
||||
SNB_UNC_CTL_CMASK_MASK)
|
||||
|
||||
#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
|
||||
SNB_UNC_CTL_UMASK_MASK | \
|
||||
SNB_UNC_CTL_EDGE_DET | \
|
||||
SNB_UNC_CTL_INVERT | \
|
||||
NHM_UNC_CTL_CMASK_MASK)
|
||||
|
||||
/* SNB global control register */
|
||||
#define SNB_UNC_PERF_GLOBAL_CTL 0x391
|
||||
#define SNB_UNC_FIXED_CTR_CTRL 0x394
|
||||
#define SNB_UNC_FIXED_CTR 0x395
|
||||
|
||||
/* SNB uncore global control */
|
||||
#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1)
|
||||
#define SNB_UNC_GLOBAL_CTL_EN (1 << 29)
|
||||
|
||||
/* SNB Cbo register */
|
||||
#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700
|
||||
#define SNB_UNC_CBO_0_PER_CTR0 0x706
|
||||
#define SNB_UNC_CBO_MSR_OFFSET 0x10
|
||||
|
||||
/* SNB ARB register */
|
||||
#define SNB_UNC_ARB_PER_CTR0 0x3b0
|
||||
#define SNB_UNC_ARB_PERFEVTSEL0 0x3b2
|
||||
#define SNB_UNC_ARB_MSR_OFFSET 0x10
|
||||
|
||||
/* NHM global control register */
|
||||
#define NHM_UNC_PERF_GLOBAL_CTL 0x391
|
||||
#define NHM_UNC_FIXED_CTR 0x394
|
||||
#define NHM_UNC_FIXED_CTR_CTRL 0x395
|
||||
|
||||
/* NHM uncore global control */
|
||||
#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1)
|
||||
#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32)
|
||||
|
||||
/* NHM uncore register */
|
||||
#define NHM_UNC_PERFEVTSEL0 0x3c0
|
||||
#define NHM_UNC_UNCORE_PMC0 0x3b0
|
||||
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
|
||||
|
||||
/* Sandy Bridge uncore support */
|
||||
static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (hwc->idx < UNCORE_PMC_IDX_FIXED)
|
||||
wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
|
||||
else
|
||||
wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
|
||||
}
|
||||
|
||||
static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
wrmsrl(event->hw.config_base, 0);
|
||||
}
|
||||
|
||||
static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
if (box->pmu->pmu_idx == 0) {
|
||||
wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
|
||||
SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
|
||||
}
|
||||
}
|
||||
|
||||
static struct uncore_event_desc snb_uncore_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct attribute *snb_uncore_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask5.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group snb_uncore_format_group = {
|
||||
.name = "format",
|
||||
.attrs = snb_uncore_formats_attr,
|
||||
};
|
||||
|
||||
static struct intel_uncore_ops snb_uncore_msr_ops = {
|
||||
.init_box = snb_uncore_msr_init_box,
|
||||
.disable_event = snb_uncore_msr_disable_event,
|
||||
.enable_event = snb_uncore_msr_enable_event,
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
};
|
||||
|
||||
static struct event_constraint snb_uncore_arb_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct intel_uncore_type snb_uncore_cbox = {
|
||||
.name = "cbox",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 4,
|
||||
.perf_ctr_bits = 44,
|
||||
.fixed_ctr_bits = 48,
|
||||
.perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
|
||||
.event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
|
||||
.fixed_ctr = SNB_UNC_FIXED_CTR,
|
||||
.fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
|
||||
.single_fixed = 1,
|
||||
.event_mask = SNB_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
|
||||
.ops = &snb_uncore_msr_ops,
|
||||
.format_group = &snb_uncore_format_group,
|
||||
.event_descs = snb_uncore_events,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type snb_uncore_arb = {
|
||||
.name = "arb",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 44,
|
||||
.perf_ctr = SNB_UNC_ARB_PER_CTR0,
|
||||
.event_ctl = SNB_UNC_ARB_PERFEVTSEL0,
|
||||
.event_mask = SNB_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = SNB_UNC_ARB_MSR_OFFSET,
|
||||
.constraints = snb_uncore_arb_constraints,
|
||||
.ops = &snb_uncore_msr_ops,
|
||||
.format_group = &snb_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *snb_msr_uncores[] = {
|
||||
&snb_uncore_cbox,
|
||||
&snb_uncore_arb,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void snb_uncore_cpu_init(void)
|
||||
{
|
||||
uncore_msr_uncores = snb_msr_uncores;
|
||||
if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
|
||||
snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
|
||||
}
|
||||
|
||||
enum {
|
||||
SNB_PCI_UNCORE_IMC,
|
||||
};
|
||||
|
||||
static struct uncore_event_desc snb_uncore_imc_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
|
||||
|
||||
INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
|
||||
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff
|
||||
#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48
|
||||
|
||||
/* page size multiple covering all config regs */
|
||||
#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000
|
||||
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2
|
||||
#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
|
||||
#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
|
||||
|
||||
static struct attribute *snb_uncore_imc_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group snb_uncore_imc_format_group = {
|
||||
.name = "format",
|
||||
.attrs = snb_uncore_imc_formats_attr,
|
||||
};
|
||||
|
||||
static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
struct pci_dev *pdev = box->pci_dev;
|
||||
int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
|
||||
resource_size_t addr;
|
||||
u32 pci_dword;
|
||||
|
||||
pci_read_config_dword(pdev, where, &pci_dword);
|
||||
addr = pci_dword;
|
||||
|
||||
#ifdef CONFIG_PHYS_ADDR_T_64BIT
|
||||
pci_read_config_dword(pdev, where + 4, &pci_dword);
|
||||
addr |= ((resource_size_t)pci_dword << 32);
|
||||
#endif
|
||||
|
||||
addr &= ~(PAGE_SIZE - 1);
|
||||
|
||||
box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
|
||||
box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
|
||||
{}
|
||||
|
||||
static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
|
||||
{}
|
||||
|
||||
static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{}
|
||||
|
||||
static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{}
|
||||
|
||||
static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
|
||||
}
|
||||
|
||||
/*
|
||||
* custom event_init() function because we define our own fixed, free
|
||||
* running counters, so we do not want to conflict with generic uncore
|
||||
* logic. Also simplifies processing
|
||||
*/
|
||||
static int snb_uncore_imc_event_init(struct perf_event *event)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu;
|
||||
struct intel_uncore_box *box;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
|
||||
int idx, base;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
pmu = uncore_event_to_pmu(event);
|
||||
/* no device found for this pmu */
|
||||
if (pmu->func_id < 0)
|
||||
return -ENOENT;
|
||||
|
||||
/* Sampling not supported yet */
|
||||
if (hwc->sample_period)
|
||||
return -EINVAL;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest ||
|
||||
event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Place all uncore events for a particular physical package
|
||||
* onto a single cpu
|
||||
*/
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* check only supported bits are set */
|
||||
if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
box = uncore_pmu_to_box(pmu, event->cpu);
|
||||
if (!box || box->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
event->cpu = box->cpu;
|
||||
|
||||
event->hw.idx = -1;
|
||||
event->hw.last_tag = ~0ULL;
|
||||
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
||||
event->hw.branch_reg.idx = EXTRA_REG_NONE;
|
||||
/*
|
||||
* check event is known (whitelist, determines counter)
|
||||
*/
|
||||
switch (cfg) {
|
||||
case SNB_UNCORE_PCI_IMC_DATA_READS:
|
||||
base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
|
||||
idx = UNCORE_PMC_IDX_FIXED;
|
||||
break;
|
||||
case SNB_UNCORE_PCI_IMC_DATA_WRITES:
|
||||
base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
|
||||
idx = UNCORE_PMC_IDX_FIXED + 1;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* must be done before validate_group */
|
||||
event->hw.event_base = base;
|
||||
event->hw.config = cfg;
|
||||
event->hw.idx = idx;
|
||||
|
||||
/* no group validation needed, we have free running counters */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
u64 count;
|
||||
|
||||
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
event->hw.state = 0;
|
||||
box->n_active++;
|
||||
|
||||
list_add_tail(&event->active_entry, &box->active_list);
|
||||
|
||||
count = snb_uncore_imc_read_counter(box, event);
|
||||
local64_set(&event->hw.prev_count, count);
|
||||
|
||||
if (box->n_active == 1)
|
||||
uncore_pmu_start_hrtimer(box);
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (!(hwc->state & PERF_HES_STOPPED)) {
|
||||
box->n_active--;
|
||||
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
|
||||
list_del(&event->active_entry);
|
||||
|
||||
if (box->n_active == 0)
|
||||
uncore_pmu_cancel_hrtimer(box);
|
||||
}
|
||||
|
||||
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
|
||||
/*
|
||||
* Drain the remaining delta count out of a event
|
||||
* that we are disabling:
|
||||
*/
|
||||
uncore_perf_event_update(box, event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
}
|
||||
|
||||
static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (!box)
|
||||
return -ENODEV;
|
||||
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
if (!(flags & PERF_EF_START))
|
||||
hwc->state |= PERF_HES_ARCH;
|
||||
|
||||
snb_uncore_imc_event_start(event, 0);
|
||||
|
||||
box->n_events++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct intel_uncore_box *box = uncore_event_to_box(event);
|
||||
int i;
|
||||
|
||||
snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
for (i = 0; i < box->n_events; i++) {
|
||||
if (event == box->event_list[i]) {
|
||||
--box->n_events;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int snb_pci2phy_map_init(int devid)
|
||||
{
|
||||
struct pci_dev *dev = NULL;
|
||||
struct pci2phy_map *map;
|
||||
int bus, segment;
|
||||
|
||||
dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
|
||||
if (!dev)
|
||||
return -ENOTTY;
|
||||
|
||||
bus = dev->bus->number;
|
||||
segment = pci_domain_nr(dev->bus);
|
||||
|
||||
raw_spin_lock(&pci2phy_map_lock);
|
||||
map = __find_pci2phy_map(segment);
|
||||
if (!map) {
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
pci_dev_put(dev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
map->pbus_to_physid[bus] = 0;
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
|
||||
pci_dev_put(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pmu snb_uncore_imc_pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = snb_uncore_imc_event_init,
|
||||
.add = snb_uncore_imc_event_add,
|
||||
.del = snb_uncore_imc_event_del,
|
||||
.start = snb_uncore_imc_event_start,
|
||||
.stop = snb_uncore_imc_event_stop,
|
||||
.read = uncore_pmu_event_read,
|
||||
};
|
||||
|
||||
static struct intel_uncore_ops snb_uncore_imc_ops = {
|
||||
.init_box = snb_uncore_imc_init_box,
|
||||
.enable_box = snb_uncore_imc_enable_box,
|
||||
.disable_box = snb_uncore_imc_disable_box,
|
||||
.disable_event = snb_uncore_imc_disable_event,
|
||||
.enable_event = snb_uncore_imc_enable_event,
|
||||
.hw_config = snb_uncore_imc_hw_config,
|
||||
.read_counter = snb_uncore_imc_read_counter,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type snb_uncore_imc = {
|
||||
.name = "imc",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 1,
|
||||
.fixed_ctr_bits = 32,
|
||||
.fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
|
||||
.event_descs = snb_uncore_imc_events,
|
||||
.format_group = &snb_uncore_imc_format_group,
|
||||
.perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
|
||||
.event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
|
||||
.ops = &snb_uncore_imc_ops,
|
||||
.pmu = &snb_uncore_imc_pmu,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *snb_pci_uncores[] = {
|
||||
[SNB_PCI_UNCORE_IMC] = &snb_uncore_imc,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct pci_device_id snb_uncore_pci_ids[] = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static const struct pci_device_id ivb_uncore_pci_ids[] = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static const struct pci_device_id hsw_uncore_pci_ids[] = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static const struct pci_device_id bdw_uncore_pci_ids[] = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static const struct pci_device_id skl_uncore_pci_ids[] = {
|
||||
{ /* IMC */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
|
||||
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct pci_driver snb_uncore_pci_driver = {
|
||||
.name = "snb_uncore",
|
||||
.id_table = snb_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static struct pci_driver ivb_uncore_pci_driver = {
|
||||
.name = "ivb_uncore",
|
||||
.id_table = ivb_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static struct pci_driver hsw_uncore_pci_driver = {
|
||||
.name = "hsw_uncore",
|
||||
.id_table = hsw_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static struct pci_driver bdw_uncore_pci_driver = {
|
||||
.name = "bdw_uncore",
|
||||
.id_table = bdw_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static struct pci_driver skl_uncore_pci_driver = {
|
||||
.name = "skl_uncore",
|
||||
.id_table = skl_uncore_pci_ids,
|
||||
};
|
||||
|
||||
struct imc_uncore_pci_dev {
|
||||
__u32 pci_id;
|
||||
struct pci_driver *driver;
|
||||
};
|
||||
#define IMC_DEV(a, d) \
|
||||
{ .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
|
||||
|
||||
static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
|
||||
IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
|
||||
IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */
|
||||
IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
|
||||
IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
|
||||
IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
|
||||
IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
|
||||
IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */
|
||||
{ /* end marker */ }
|
||||
};
|
||||
|
||||
|
||||
#define for_each_imc_pci_id(x, t) \
|
||||
for (x = (t); (x)->pci_id; x++)
|
||||
|
||||
static struct pci_driver *imc_uncore_find_dev(void)
|
||||
{
|
||||
const struct imc_uncore_pci_dev *p;
|
||||
int ret;
|
||||
|
||||
for_each_imc_pci_id(p, desktop_imc_pci_ids) {
|
||||
ret = snb_pci2phy_map_init(p->pci_id);
|
||||
if (ret == 0)
|
||||
return p->driver;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int imc_uncore_pci_init(void)
|
||||
{
|
||||
struct pci_driver *imc_drv = imc_uncore_find_dev();
|
||||
|
||||
if (!imc_drv)
|
||||
return -ENODEV;
|
||||
|
||||
uncore_pci_uncores = snb_pci_uncores;
|
||||
uncore_pci_driver = imc_drv;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int snb_uncore_pci_init(void)
|
||||
{
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
|
||||
int ivb_uncore_pci_init(void)
|
||||
{
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
int hsw_uncore_pci_init(void)
|
||||
{
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
|
||||
int bdw_uncore_pci_init(void)
|
||||
{
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
|
||||
int skl_uncore_pci_init(void)
|
||||
{
|
||||
return imc_uncore_pci_init();
|
||||
}
|
||||
|
||||
/* end of Sandy Bridge uncore support */
|
||||
|
||||
/* Nehalem uncore support */
|
||||
static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
|
||||
}
|
||||
|
||||
static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
|
||||
{
|
||||
wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
|
||||
}
|
||||
|
||||
static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (hwc->idx < UNCORE_PMC_IDX_FIXED)
|
||||
wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
|
||||
else
|
||||
wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
|
||||
}
|
||||
|
||||
static struct attribute *nhm_uncore_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask8.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group nhm_uncore_format_group = {
|
||||
.name = "format",
|
||||
.attrs = nhm_uncore_formats_attr,
|
||||
};
|
||||
|
||||
static struct uncore_event_desc nhm_uncore_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
|
||||
INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"),
|
||||
INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"),
|
||||
INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct intel_uncore_ops nhm_uncore_msr_ops = {
|
||||
.disable_box = nhm_uncore_msr_disable_box,
|
||||
.enable_box = nhm_uncore_msr_enable_box,
|
||||
.disable_event = snb_uncore_msr_disable_event,
|
||||
.enable_event = nhm_uncore_msr_enable_event,
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type nhm_uncore = {
|
||||
.name = "",
|
||||
.num_counters = 8,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 48,
|
||||
.fixed_ctr_bits = 48,
|
||||
.event_ctl = NHM_UNC_PERFEVTSEL0,
|
||||
.perf_ctr = NHM_UNC_UNCORE_PMC0,
|
||||
.fixed_ctr = NHM_UNC_FIXED_CTR,
|
||||
.fixed_ctl = NHM_UNC_FIXED_CTR_CTRL,
|
||||
.event_mask = NHM_UNC_RAW_EVENT_MASK,
|
||||
.event_descs = nhm_uncore_events,
|
||||
.ops = &nhm_uncore_msr_ops,
|
||||
.format_group = &nhm_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *nhm_msr_uncores[] = {
|
||||
&nhm_uncore,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void nhm_uncore_cpu_init(void)
|
||||
{
|
||||
uncore_msr_uncores = nhm_msr_uncores;
|
||||
}
|
||||
|
||||
/* end of Nehalem uncore support */
|
File diff suppressed because it is too large
Load Diff
@@ -1,319 +0,0 @@
|
||||
/* Driver for Intel Xeon Phi "Knights Corner" PMU */
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/hardirq.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static const u64 knc_perfmon_event_map[] =
|
||||
{
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
|
||||
};
|
||||
|
||||
static const u64 __initconst knc_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
/* On Xeon Phi event "0" is a valid DATA_READ */
|
||||
/* (L1 Data Cache Reads) Instruction. */
|
||||
/* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
|
||||
/* bit will always be set in x86_pmu_hw_config(). */
|
||||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
|
||||
/* DATA_READ */
|
||||
[ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
|
||||
[ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
|
||||
[ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
|
||||
[ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
|
||||
[ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
|
||||
/* DATA_READ */
|
||||
/* see note on L1 OP_READ */
|
||||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
|
||||
[ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0,
|
||||
[ C(RESULT_MISS) ] = 0x0,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
|
||||
[ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
|
||||
[ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
static u64 knc_pmu_event_map(int hw_event)
|
||||
{
|
||||
return knc_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
static struct event_constraint knc_event_constraints[] =
|
||||
{
|
||||
INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
|
||||
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
|
||||
INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
|
||||
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
|
||||
INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
|
||||
INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
|
||||
INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
|
||||
INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
|
||||
INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
|
||||
#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
|
||||
|
||||
#define KNC_ENABLE_COUNTER0 0x00000001
|
||||
#define KNC_ENABLE_COUNTER1 0x00000002
|
||||
|
||||
static void knc_pmu_disable_all(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
}
|
||||
|
||||
static void knc_pmu_enable_all(int added)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
|
||||
}
|
||||
|
||||
static inline void
|
||||
knc_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
|
||||
}
|
||||
|
||||
static void knc_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
|
||||
}
|
||||
|
||||
static inline u64 knc_pmu_get_status(void)
|
||||
{
|
||||
u64 status;
|
||||
|
||||
rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static inline void knc_pmu_ack_status(u64 ack)
|
||||
{
|
||||
wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
|
||||
}
|
||||
|
||||
static int knc_pmu_handle_irq(struct pt_regs *regs)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
struct cpu_hw_events *cpuc;
|
||||
int handled = 0;
|
||||
int bit, loops;
|
||||
u64 status;
|
||||
|
||||
cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
knc_pmu_disable_all();
|
||||
|
||||
status = knc_pmu_get_status();
|
||||
if (!status) {
|
||||
knc_pmu_enable_all(0);
|
||||
return handled;
|
||||
}
|
||||
|
||||
loops = 0;
|
||||
again:
|
||||
knc_pmu_ack_status(status);
|
||||
if (++loops > 100) {
|
||||
WARN_ONCE(1, "perf: irq loop stuck!\n");
|
||||
perf_event_print_debug();
|
||||
goto done;
|
||||
}
|
||||
|
||||
inc_irq_stat(apic_perf_irqs);
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
|
||||
struct perf_event *event = cpuc->events[bit];
|
||||
|
||||
handled++;
|
||||
|
||||
if (!test_bit(bit, cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
if (!intel_pmu_save_and_restart(event))
|
||||
continue;
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Repeat if there is more work to be done:
|
||||
*/
|
||||
status = knc_pmu_get_status();
|
||||
if (status)
|
||||
goto again;
|
||||
|
||||
done:
|
||||
knc_pmu_enable_all(0);
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
|
||||
static struct attribute *intel_knc_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct x86_pmu knc_pmu __initconst = {
|
||||
.name = "knc",
|
||||
.handle_irq = knc_pmu_handle_irq,
|
||||
.disable_all = knc_pmu_disable_all,
|
||||
.enable_all = knc_pmu_enable_all,
|
||||
.enable = knc_pmu_enable_event,
|
||||
.disable = knc_pmu_disable_event,
|
||||
.hw_config = x86_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_KNC_EVNTSEL0,
|
||||
.perfctr = MSR_KNC_PERFCTR0,
|
||||
.event_map = knc_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(knc_perfmon_event_map),
|
||||
.apic = 1,
|
||||
.max_period = (1ULL << 39) - 1,
|
||||
.version = 0,
|
||||
.num_counters = 2,
|
||||
.cntval_bits = 40,
|
||||
.cntval_mask = (1ULL << 40) - 1,
|
||||
.get_event_constraints = x86_get_event_constraints,
|
||||
.event_constraints = knc_event_constraints,
|
||||
.format_attrs = intel_knc_formats_attr,
|
||||
};
|
||||
|
||||
__init int knc_pmu_init(void)
|
||||
{
|
||||
x86_pmu = knc_pmu;
|
||||
|
||||
memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
return 0;
|
||||
}
|
@@ -1,241 +0,0 @@
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
enum perf_msr_id {
|
||||
PERF_MSR_TSC = 0,
|
||||
PERF_MSR_APERF = 1,
|
||||
PERF_MSR_MPERF = 2,
|
||||
PERF_MSR_PPERF = 3,
|
||||
PERF_MSR_SMI = 4,
|
||||
|
||||
PERF_MSR_EVENT_MAX,
|
||||
};
|
||||
|
||||
static bool test_aperfmperf(int idx)
|
||||
{
|
||||
return boot_cpu_has(X86_FEATURE_APERFMPERF);
|
||||
}
|
||||
|
||||
static bool test_intel(int idx)
|
||||
{
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
||||
boot_cpu_data.x86 != 6)
|
||||
return false;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 30: /* 45nm Nehalem */
|
||||
case 26: /* 45nm Nehalem-EP */
|
||||
case 46: /* 45nm Nehalem-EX */
|
||||
|
||||
case 37: /* 32nm Westmere */
|
||||
case 44: /* 32nm Westmere-EP */
|
||||
case 47: /* 32nm Westmere-EX */
|
||||
|
||||
case 42: /* 32nm SandyBridge */
|
||||
case 45: /* 32nm SandyBridge-E/EN/EP */
|
||||
|
||||
case 58: /* 22nm IvyBridge */
|
||||
case 62: /* 22nm IvyBridge-EP/EX */
|
||||
|
||||
case 60: /* 22nm Haswell Core */
|
||||
case 63: /* 22nm Haswell Server */
|
||||
case 69: /* 22nm Haswell ULT */
|
||||
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
|
||||
|
||||
case 61: /* 14nm Broadwell Core-M */
|
||||
case 86: /* 14nm Broadwell Xeon D */
|
||||
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
|
||||
case 79: /* 14nm Broadwell Server */
|
||||
|
||||
case 55: /* 22nm Atom "Silvermont" */
|
||||
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
|
||||
case 76: /* 14nm Atom "Airmont" */
|
||||
if (idx == PERF_MSR_SMI)
|
||||
return true;
|
||||
break;
|
||||
|
||||
case 78: /* 14nm Skylake Mobile */
|
||||
case 94: /* 14nm Skylake Desktop */
|
||||
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct perf_msr {
|
||||
u64 msr;
|
||||
struct perf_pmu_events_attr *attr;
|
||||
bool (*test)(int idx);
|
||||
};
|
||||
|
||||
PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
|
||||
PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
|
||||
PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
|
||||
PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
|
||||
PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
|
||||
|
||||
static struct perf_msr msr[] = {
|
||||
[PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
|
||||
[PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
|
||||
[PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
|
||||
[PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
|
||||
[PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
|
||||
};
|
||||
|
||||
static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group events_attr_group = {
|
||||
.name = "events",
|
||||
.attrs = events_attrs,
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-63");
|
||||
static struct attribute *format_attrs[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
static struct attribute_group format_attr_group = {
|
||||
.name = "format",
|
||||
.attrs = format_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group *attr_groups[] = {
|
||||
&events_attr_group,
|
||||
&format_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int msr_event_init(struct perf_event *event)
|
||||
{
|
||||
u64 cfg = event->attr.config;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
if (cfg >= PERF_MSR_EVENT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
/* unsupported modes and filters */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest ||
|
||||
event->attr.sample_period) /* no sampling */
|
||||
return -EINVAL;
|
||||
|
||||
if (!msr[cfg].attr)
|
||||
return -EINVAL;
|
||||
|
||||
event->hw.idx = -1;
|
||||
event->hw.event_base = msr[cfg].msr;
|
||||
event->hw.config = cfg;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 msr_read_counter(struct perf_event *event)
|
||||
{
|
||||
u64 now;
|
||||
|
||||
if (event->hw.event_base)
|
||||
rdmsrl(event->hw.event_base, now);
|
||||
else
|
||||
rdtscll(now);
|
||||
|
||||
return now;
|
||||
}
|
||||
static void msr_event_update(struct perf_event *event)
|
||||
{
|
||||
u64 prev, now;
|
||||
s64 delta;
|
||||
|
||||
/* Careful, an NMI might modify the previous event value. */
|
||||
again:
|
||||
prev = local64_read(&event->hw.prev_count);
|
||||
now = msr_read_counter(event);
|
||||
|
||||
if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
|
||||
goto again;
|
||||
|
||||
delta = now - prev;
|
||||
if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
|
||||
delta = sign_extend64(delta, 31);
|
||||
|
||||
local64_add(now - prev, &event->count);
|
||||
}
|
||||
|
||||
static void msr_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
u64 now;
|
||||
|
||||
now = msr_read_counter(event);
|
||||
local64_set(&event->hw.prev_count, now);
|
||||
}
|
||||
|
||||
static void msr_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
msr_event_update(event);
|
||||
}
|
||||
|
||||
static void msr_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
msr_event_stop(event, PERF_EF_UPDATE);
|
||||
}
|
||||
|
||||
static int msr_event_add(struct perf_event *event, int flags)
|
||||
{
|
||||
if (flags & PERF_EF_START)
|
||||
msr_event_start(event, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pmu pmu_msr = {
|
||||
.task_ctx_nr = perf_sw_context,
|
||||
.attr_groups = attr_groups,
|
||||
.event_init = msr_event_init,
|
||||
.add = msr_event_add,
|
||||
.del = msr_event_del,
|
||||
.start = msr_event_start,
|
||||
.stop = msr_event_stop,
|
||||
.read = msr_event_update,
|
||||
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
|
||||
};
|
||||
|
||||
static int __init msr_init(void)
|
||||
{
|
||||
int i, j = 0;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_TSC)) {
|
||||
pr_cont("no MSR PMU driver.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Probe the MSRs. */
|
||||
for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
|
||||
u64 val;
|
||||
|
||||
/*
|
||||
* Virt sucks arse; you cannot tell if a R/O MSR is present :/
|
||||
*/
|
||||
if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
|
||||
msr[i].attr = NULL;
|
||||
}
|
||||
|
||||
/* List remaining MSRs in the sysfs attrs. */
|
||||
for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
|
||||
if (msr[i].attr)
|
||||
events_attrs[j++] = &msr[i].attr->attr.attr;
|
||||
}
|
||||
events_attrs[j] = NULL;
|
||||
|
||||
perf_pmu_register(&pmu_msr, "msr", -1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(msr_init);
|
File diff suppressed because it is too large
Load Diff
@@ -1,279 +0,0 @@
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
/*
|
||||
* Not sure about some of these
|
||||
*/
|
||||
static const u64 p6_perfmon_event_map[] =
|
||||
{
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */
|
||||
|
||||
};
|
||||
|
||||
static const u64 __initconst p6_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
{
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
|
||||
[ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(L1I ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
|
||||
[ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(LL ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
|
||||
[ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU ) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */
|
||||
[ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static u64 p6_pmu_event_map(int hw_event)
|
||||
{
|
||||
return p6_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
/*
|
||||
* Event setting that is specified not to count anything.
|
||||
* We use this to effectively disable a counter.
|
||||
*
|
||||
* L2_RQSTS with 0 MESI unit mask.
|
||||
*/
|
||||
#define P6_NOP_EVENT 0x0000002EULL
|
||||
|
||||
static struct event_constraint p6_event_constraints[] =
|
||||
{
|
||||
INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
|
||||
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
|
||||
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
|
||||
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
|
||||
INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
|
||||
INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static void p6_pmu_disable_all(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
/* p6 only has one enable register */
|
||||
rdmsrl(MSR_P6_EVNTSEL0, val);
|
||||
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
wrmsrl(MSR_P6_EVNTSEL0, val);
|
||||
}
|
||||
|
||||
static void p6_pmu_enable_all(int added)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
/* p6 only has one enable register */
|
||||
rdmsrl(MSR_P6_EVNTSEL0, val);
|
||||
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
wrmsrl(MSR_P6_EVNTSEL0, val);
|
||||
}
|
||||
|
||||
static inline void
|
||||
p6_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val = P6_NOP_EVENT;
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base, val);
|
||||
}
|
||||
|
||||
static void p6_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 val;
|
||||
|
||||
val = hwc->config;
|
||||
|
||||
/*
|
||||
* p6 only has a global event enable, set on PerfEvtSel0
|
||||
* We "disable" events by programming P6_NOP_EVENT
|
||||
* and we rely on p6_pmu_enable_all() being called
|
||||
* to actually enable the events.
|
||||
*/
|
||||
|
||||
(void)wrmsrl_safe(hwc->config_base, val);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
PMU_FORMAT_ATTR(pc, "config:19" );
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
|
||||
static struct attribute *intel_p6_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_pc.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static __initconst const struct x86_pmu p6_pmu = {
|
||||
.name = "p6",
|
||||
.handle_irq = x86_pmu_handle_irq,
|
||||
.disable_all = p6_pmu_disable_all,
|
||||
.enable_all = p6_pmu_enable_all,
|
||||
.enable = p6_pmu_enable_event,
|
||||
.disable = p6_pmu_disable_event,
|
||||
.hw_config = x86_pmu_hw_config,
|
||||
.schedule_events = x86_schedule_events,
|
||||
.eventsel = MSR_P6_EVNTSEL0,
|
||||
.perfctr = MSR_P6_PERFCTR0,
|
||||
.event_map = p6_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
|
||||
.apic = 1,
|
||||
.max_period = (1ULL << 31) - 1,
|
||||
.version = 0,
|
||||
.num_counters = 2,
|
||||
/*
|
||||
* Events have 40 bits implemented. However they are designed such
|
||||
* that bits [32-39] are sign extensions of bit 31. As such the
|
||||
* effective width of a event for P6-like PMU is 32 bits only.
|
||||
*
|
||||
* See IA-32 Intel Architecture Software developer manual Vol 3B
|
||||
*/
|
||||
.cntval_bits = 32,
|
||||
.cntval_mask = (1ULL << 32) - 1,
|
||||
.get_event_constraints = x86_get_event_constraints,
|
||||
.event_constraints = p6_event_constraints,
|
||||
|
||||
.format_attrs = intel_p6_formats_attr,
|
||||
.events_sysfs_show = intel_event_sysfs_show,
|
||||
|
||||
};
|
||||
|
||||
static __init void p6_pmu_rdpmc_quirk(void)
|
||||
{
|
||||
if (boot_cpu_data.x86_mask < 9) {
|
||||
/*
|
||||
* PPro erratum 26; fixed in stepping 9 and above.
|
||||
*/
|
||||
pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
|
||||
x86_pmu.attr_rdpmc_broken = 1;
|
||||
x86_pmu.attr_rdpmc = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__init int p6_pmu_init(void)
|
||||
{
|
||||
x86_pmu = p6_pmu;
|
||||
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case 1: /* Pentium Pro */
|
||||
x86_add_quirk(p6_pmu_rdpmc_quirk);
|
||||
break;
|
||||
|
||||
case 3: /* Pentium II - Klamath */
|
||||
case 5: /* Pentium II - Deschutes */
|
||||
case 6: /* Pentium II - Mendocino */
|
||||
break;
|
||||
|
||||
case 7: /* Pentium III - Katmai */
|
||||
case 8: /* Pentium III - Coppermine */
|
||||
case 10: /* Pentium III Xeon */
|
||||
case 11: /* Pentium III - Tualatin */
|
||||
break;
|
||||
|
||||
case 9: /* Pentium M - Banias */
|
||||
case 13: /* Pentium M - Dothan */
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
||||
return 0;
|
||||
}
|
@@ -51,7 +51,7 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
|
||||
for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
|
||||
if (!rdrand_long(&tmp)) {
|
||||
clear_cpu_cap(c, X86_FEATURE_RDRAND);
|
||||
printk_once(KERN_WARNING "rdrand: disabled\n");
|
||||
pr_warn_once("rdrand: disabled\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@@ -87,10 +87,10 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
|
||||
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
|
||||
|
||||
if (!printed) {
|
||||
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
||||
pr_info("CPU: Physical Processor ID: %d\n",
|
||||
c->phys_proc_id);
|
||||
if (c->x86_max_cores > 1)
|
||||
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
||||
pr_info("CPU: Processor Core ID: %d\n",
|
||||
c->cpu_core_id);
|
||||
printed = 1;
|
||||
}
|
||||
|
@@ -33,7 +33,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
|
||||
if (max >= 0x80860001) {
|
||||
cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
|
||||
if (cpu_rev != 0x02000000) {
|
||||
printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
|
||||
pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
|
||||
(cpu_rev >> 24) & 0xff,
|
||||
(cpu_rev >> 16) & 0xff,
|
||||
(cpu_rev >> 8) & 0xff,
|
||||
@@ -44,10 +44,10 @@ static void init_transmeta(struct cpuinfo_x86 *c)
|
||||
if (max >= 0x80860002) {
|
||||
cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
|
||||
if (cpu_rev == 0x02000000) {
|
||||
printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
|
||||
pr_info("CPU: Processor revision %08X, %u MHz\n",
|
||||
new_cpu_rev, cpu_freq);
|
||||
}
|
||||
printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
|
||||
pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
|
||||
(cms_rev1 >> 24) & 0xff,
|
||||
(cms_rev1 >> 16) & 0xff,
|
||||
(cms_rev1 >> 8) & 0xff,
|
||||
@@ -76,7 +76,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
|
||||
(void *)&cpu_info[56],
|
||||
(void *)&cpu_info[60]);
|
||||
cpu_info[64] = '\0';
|
||||
printk(KERN_INFO "CPU: %s\n", cpu_info);
|
||||
pr_info("CPU: %s\n", cpu_info);
|
||||
}
|
||||
|
||||
/* Unhide possibly hidden capability flags */
|
||||
|
@@ -62,7 +62,7 @@ static unsigned long vmware_get_tsc_khz(void)
|
||||
tsc_hz = eax | (((uint64_t)ebx) << 32);
|
||||
do_div(tsc_hz, 1000);
|
||||
BUG_ON(tsc_hz >> 32);
|
||||
printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
|
||||
pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
|
||||
(unsigned long) tsc_hz / 1000,
|
||||
(unsigned long) tsc_hz % 1000);
|
||||
|
||||
@@ -84,8 +84,7 @@ static void __init vmware_platform_setup(void)
|
||||
if (ebx != UINT_MAX)
|
||||
x86_platform.calibrate_tsc = vmware_get_tsc_khz;
|
||||
else
|
||||
printk(KERN_WARNING
|
||||
"Failed to get TSC freq from the hypervisor\n");
|
||||
pr_warn("Failed to get TSC freq from the hypervisor\n");
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -408,7 +408,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
|
||||
processor.cpuflag = CPU_ENABLED;
|
||||
processor.cpufeature = (boot_cpu_data.x86 << 8) |
|
||||
(boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
|
||||
processor.featureflag = boot_cpu_data.x86_capability[0];
|
||||
processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
|
||||
processor.reserved[0] = 0;
|
||||
processor.reserved[1] = 0;
|
||||
for (i = 0; i < 2; i++) {
|
||||
|
@@ -30,6 +30,7 @@
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/reboot.h>
|
||||
#include <asm/cache.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/nmi.h>
|
||||
@@ -69,7 +70,7 @@ struct nmi_stats {
|
||||
|
||||
static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
|
||||
|
||||
static int ignore_nmis;
|
||||
static int ignore_nmis __read_mostly;
|
||||
|
||||
int unknown_nmi_panic;
|
||||
/*
|
||||
|
@@ -97,6 +97,14 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
|
||||
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_info);
|
||||
|
||||
/* Logical package management. We might want to allocate that dynamically */
|
||||
static int *physical_to_logical_pkg __read_mostly;
|
||||
static unsigned long *physical_package_map __read_mostly;;
|
||||
static unsigned long *logical_package_map __read_mostly;
|
||||
static unsigned int max_physical_pkg_id __read_mostly;
|
||||
unsigned int __max_logical_packages __read_mostly;
|
||||
EXPORT_SYMBOL(__max_logical_packages);
|
||||
|
||||
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -251,6 +259,97 @@ static void notrace start_secondary(void *unused)
|
||||
cpu_startup_entry(CPUHP_ONLINE);
|
||||
}
|
||||
|
||||
int topology_update_package_map(unsigned int apicid, unsigned int cpu)
|
||||
{
|
||||
unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits;
|
||||
|
||||
/* Called from early boot ? */
|
||||
if (!physical_package_map)
|
||||
return 0;
|
||||
|
||||
if (pkg >= max_physical_pkg_id)
|
||||
return -EINVAL;
|
||||
|
||||
/* Set the logical package id */
|
||||
if (test_and_set_bit(pkg, physical_package_map))
|
||||
goto found;
|
||||
|
||||
if (pkg < __max_logical_packages) {
|
||||
set_bit(pkg, logical_package_map);
|
||||
physical_to_logical_pkg[pkg] = pkg;
|
||||
goto found;
|
||||
}
|
||||
new = find_first_zero_bit(logical_package_map, __max_logical_packages);
|
||||
if (new >= __max_logical_packages) {
|
||||
physical_to_logical_pkg[pkg] = -1;
|
||||
pr_warn("APIC(%x) Package %u exceeds logical package map\n",
|
||||
apicid, pkg);
|
||||
return -ENOSPC;
|
||||
}
|
||||
set_bit(new, logical_package_map);
|
||||
pr_info("APIC(%x) Converting physical %u to logical package %u\n",
|
||||
apicid, pkg, new);
|
||||
physical_to_logical_pkg[pkg] = new;
|
||||
|
||||
found:
|
||||
cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* topology_phys_to_logical_pkg - Map a physical package id to a logical
|
||||
*
|
||||
* Returns logical package id or -1 if not found
|
||||
*/
|
||||
int topology_phys_to_logical_pkg(unsigned int phys_pkg)
|
||||
{
|
||||
if (phys_pkg >= max_physical_pkg_id)
|
||||
return -1;
|
||||
return physical_to_logical_pkg[phys_pkg];
|
||||
}
|
||||
EXPORT_SYMBOL(topology_phys_to_logical_pkg);
|
||||
|
||||
static void __init smp_init_package_map(void)
|
||||
{
|
||||
unsigned int ncpus, cpu;
|
||||
size_t size;
|
||||
|
||||
/*
|
||||
* Today neither Intel nor AMD support heterogenous systems. That
|
||||
* might change in the future....
|
||||
*/
|
||||
ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings;
|
||||
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
|
||||
|
||||
/*
|
||||
* Possibly larger than what we need as the number of apic ids per
|
||||
* package can be smaller than the actual used apic ids.
|
||||
*/
|
||||
max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
|
||||
size = max_physical_pkg_id * sizeof(unsigned int);
|
||||
physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
|
||||
memset(physical_to_logical_pkg, 0xff, size);
|
||||
size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
|
||||
physical_package_map = kzalloc(size, GFP_KERNEL);
|
||||
size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
|
||||
logical_package_map = kzalloc(size, GFP_KERNEL);
|
||||
|
||||
pr_info("Max logical packages: %u\n", __max_logical_packages);
|
||||
|
||||
for_each_present_cpu(cpu) {
|
||||
unsigned int apicid = apic->cpu_present_to_apicid(cpu);
|
||||
|
||||
if (apicid == BAD_APICID || !apic->apic_id_valid(apicid))
|
||||
continue;
|
||||
if (!topology_update_package_map(apicid, cpu))
|
||||
continue;
|
||||
pr_warn("CPU %u APICId %x disabled\n", cpu, apicid);
|
||||
per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID;
|
||||
set_cpu_possible(cpu, false);
|
||||
set_cpu_present(cpu, false);
|
||||
}
|
||||
}
|
||||
|
||||
void __init smp_store_boot_cpu_info(void)
|
||||
{
|
||||
int id = 0; /* CPU 0 */
|
||||
@@ -258,6 +357,7 @@ void __init smp_store_boot_cpu_info(void)
|
||||
|
||||
*c = boot_cpu_data;
|
||||
c->cpu_index = id;
|
||||
smp_init_package_map();
|
||||
}
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user