Merge tag 'powerpc-4.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Highlights: - PowerNV PCI hotplug support. - Lots more Power9 support. - eBPF JIT support on ppc64le. - Lots of cxl updates. - Boot code consolidation. Bug fixes: - Fix spin_unlock_wait() from Boqun Feng - Fix stack pointer corruption in __tm_recheckpoint() from Michael Neuling - Fix multiple bugs in memory_hotplug_max() from Bharata B Rao - mm: Ensure "special" zones are empty from Oliver O'Halloran - ftrace: Separate the heuristics for checking call sites from Michael Ellerman - modules: Never restore r2 for a mprofile-kernel style mcount() call from Michael Ellerman - Fix endianness when reading TCEs from Alexey Kardashevskiy - start rtasd before PCI probing from Greg Kurz - PCI: rpaphp: Fix slot registration for multiple slots under a PHB from Tyrel Datwyler - powerpc/mm: Add memory barrier in __hugepte_alloc() from Sukadev Bhattiprolu Cleanups & fixes: - Drop support for MPIC in pseries from Rashmica Gupta - Define and use PPC64_ELF_ABI_v2/v1 from Michael Ellerman - Remove unused symbols in asm-offsets.c from Rashmica Gupta - Fix SRIOV not building without EEH enabled from Russell Currey - Remove kretprobe_trampoline_holder from Thiago Jung Bauermann - Reduce log level of PCI I/O space warning from Benjamin Herrenschmidt - Add array bounds checking to crash_shutdown_handlers from Suraj Jitindar Singh - Avoid -maltivec when using clang integrated assembler from Anton Blanchard - Fix array overrun in ppc_rtas() syscall from Andrew Donnellan - Fix error return value in cmm_mem_going_offline() from Rasmus Villemoes - export cpu_to_core_id() from Mauricio Faria de Oliveira - Remove old symbols from defconfigs from Andrew Donnellan - Update obsolete comments in setup_32.c about entry conditions from Benjamin Herrenschmidt - Add comment explaining the purpose of setup_kdump_trampoline() from Benjamin Herrenschmidt - Merge the RELOCATABLE config entries for ppc32 and ppc64 from Kevin Hao - Remove RELOCATABLE_PPC32 from Kevin Hao - Fix .long's in tlb-radix.c to more meaningful from Balbir Singh Minor cleanups & fixes: - Andrew Donnellan, Anna-Maria Gleixner, Anton Blanchard, Benjamin Herrenschmidt, Bharata B Rao, Christophe Leroy, Colin Ian King, Geliang Tang, Greg Kurz, Madhavan Srinivasan, Michael Ellerman, Michael Ellerman, Stephen Rothwell, Stewart Smith. Freescale updates from Scott: - "Highlights include more 8xx optimizations, device tree updates, and MVME7100 support." PowerNV PCI hotplug from Gavin Shan: - PCI: Add pcibios_setup_bridge() - Override pcibios_setup_bridge() - Remove PCI_RESET_DELAY_US - Move pnv_pci_ioda_setup_opal_tce_kill() around - Increase PE# capacity - Allocate PE# in reverse order - Create PEs in pcibios_setup_bridge() - Setup PE for root bus - Extend PCI bridge resources - Make pnv_ioda_deconfigure_pe() visible - Dynamically release PE - Update bridge windows on PCI plug - Delay populating pdn - Support PCI slot ID - Use PCI slot reset infrastructure - Introduce pnv_pci_get_slot_id() - Functions to get/set PCI slot state - PCI/hotplug: PowerPC PowerNV PCI hotplug driver - Print correct PHB type names Power9 idle support from Shreyas B. Prabhu: - set power_save func after the idle states are initialized - Use PNV_THREAD_WINKLE macro while requesting for winkle - make hypervisor state restore a function - Rename idle_power7.S to idle_book3s.S - Rename reusable idle functions to hardware agnostic names - Make pnv_powersave_common more generic - abstraction for saving SPRs before entering deep idle states - Add platform support for stop instruction - cpuidle/powernv: Use CPUIDLE_STATE_MAX instead of MAX_POWERNV_IDLE_STATES - cpuidle/powernv: cleanup cpuidle-powernv.c - cpuidle/powernv: Add support for POWER ISA v3 idle states - Use deepest stop state when cpu is offlined Power9 PMU from Madhavan Srinivasan: - factor out power8 pmu macros and defines - factor out power8 pmu functions - factor out power8 __init_pmu code - Add power9 event list macros for generic and cache events - Power9 PMU support - Export Power9 generic and cache events to sysfs Power9 preliminary interrupt & PCI support from Benjamin Herrenschmidt: - Add XICS emulation APIs - Move a few exception common handlers to make room - Add support for HV virtualization interrupts - Add mechanism to force a replay of interrupts - Add ICP OPAL backend - Discover IODA3 PHBs - pci: Remove obsolete SW invalidate - opal: Add real mode call wrappers - Rename TCE invalidation calls - Remove SWINV constants and obsolete TCE code - Rework accessing the TCE invalidate register - Fallback to OPAL for TCE invalidations - Use the device-tree to get available range of M64's - Check status of a PHB before using it - pci: Don't try to allocate resources that will be reassigned Other Power9: - Send SIGBUS on unaligned copy and paste from Chris Smart - Large Decrementer support from Oliver O'Halloran - Load Monitor Register Support from Jack Miller Performance improvements from Anton Blanchard: - Avoid load hit store in __giveup_fpu() and __giveup_altivec() - Avoid load hit store in setup_sigcontext() - Remove assembly versions of strcpy, strcat, strlen and strcmp - Align hot loops of some string functions eBPF JIT from Naveen N. Rao: - Fix/enhance 32-bit Load Immediate implementation - Optimize 64-bit Immediate loads - Introduce rotate immediate instructions - A few cleanups - Isolate classic BPF JIT specifics into a separate header - Implement JIT compiler for extended BPF Operator Panel driver from Suraj Jitindar Singh: - devicetree/bindings: Add binding for operator panel on FSP machines - Add inline function to get rc from an ASYNC_COMP opal_msg - Add driver for operator panel on FSP machines Sparse fixes from Daniel Axtens: - make some things static - Introduce asm-prototypes.h - Include headers containing prototypes - Use #ifdef __BIG_ENDIAN__ #else for REG_BYTE - kvm: Clarify __user annotations - Pass endianness to sparse - Make ppc_md.{halt, restart} __noreturn MM fixes & cleanups from Aneesh Kumar K.V: - radix: Update LPCR HR bit as per ISA - use _raw variant of page table accessors - Compile out radix related functions if RADIX_MMU is disabled - Clear top 16 bits of va only on older cpus - Print formation regarding the the MMU mode - hash: Update SDR1 size encoding as documented in ISA 3.0 - radix: Update PID switch sequence - radix: Update machine call back to support new HCALL. - radix: Add LPID based tlb flush helpers - radix: Add a kernel command line to disable radix - Cleanup LPCR defines Boot code consolidation from Benjamin Herrenschmidt: - Move epapr_paravirt_early_init() to early_init_devtree() - cell: Don't use flat device-tree after boot - ge_imp3a: Don't use the flat device-tree after boot - mpc85xx_ds: Don't use the flat device-tree after boot - mpc85xx_rdb: Don't use the flat device-tree after boot - Don't test for machine type in rtas_initialize() - Don't test for machine type in smp_setup_cpu_maps() - dt: Add of_device_compatible_match() - Factor do_feature_fixup calls - Move 64-bit feature fixup earlier - Move 64-bit memory reserves to setup_arch() - Use a cachable DART - Move FW feature probing out of pseries probe() - Put exception configuration in a common place - Remove early allocation of the SMU command buffer - Move MMU backend selection out of platform code - pasemi: Remove IOBMAP allocation from platform probe() - mm/hash: Don't use machine_is() early during boot - Don't test for machine type to detect HEA special case - pmac: Remove spurrious machine type test - Move hash table ops to a separate structure - Ensure that ppc_md is empty before probing for machine type - Move 64-bit probe_machine() to later in the boot process - Move 32-bit probe() machine to later in the boot process - Get rid of ppc_md.init_early() - Move the boot time info banner to a separate function - Move setting of {i,d}cache_bsize to initialize_cache_info() - Move the content of setup_system() to setup_arch() - Move cache info inits to a separate function - Re-order the call to smp_setup_cpu_maps() - Re-order setup_panic() - Make a few boot functions __init - Merge 32-bit and 64-bit setup_arch() Other new features: - tty/hvc: Use IRQF_SHARED for OPAL hvc consoles from Sam Mendoza-Jonas - tty/hvc: Use opal irqchip interface if available from Sam Mendoza-Jonas - powerpc: Add module autoloading based on CPU features from Alastair D'Silva - crypto: vmx - Convert to CPU feature based module autoloading from Alastair D'Silva - Wake up kopald polling thread before waiting for events from Benjamin Herrenschmidt - xmon: Dump ISA 2.06 SPRs from Michael Ellerman - xmon: Dump ISA 2.07 SPRs from Michael Ellerman - Add a parameter to disable 1TB segs from Oliver O'Halloran - powerpc/boot: Add OPAL console to epapr wrappers from Oliver O'Halloran - Assign fixed PHB number based on device-tree properties from Guilherme G. Piccoli - pseries: Add pseries hotplug workqueue from John Allen - pseries: Add support for hotplug interrupt source from John Allen - pseries: Use kernel hotplug queue for PowerVM hotplug events from John Allen - pseries: Move property cloning into its own routine from Nathan Fontenot - pseries: Dynamic add entires to associativity lookup array from Nathan Fontenot - pseries: Auto-online hotplugged memory from Nathan Fontenot - pseries: Remove call to memblock_add() from Nathan Fontenot cxl: - Add set and get private data to context struct from Michael Neuling - make base more explicitly non-modular from Paul Gortmaker - Use for_each_compatible_node() macro from Wei Yongjun - Frederic Barrat - Abstract the differences between the PSL and XSL - Make vPHB device node match adapter's - Philippe Bergheaud - Add mechanism for delivering AFU driver specific events - Ignore CAPI adapters misplaced in switched slots - Refine slice error debug messages - Andrew Donnellan - static-ify variables to fix sparse warnings - PCI/hotplug: pnv_php: export symbols and move struct types needed by cxl - PCI/hotplug: pnv_php: handle OPAL_PCI_SLOT_OFFLINE power state - Add cxl_check_and_switch_mode() API to switch bi-modal cards - remove dead Kconfig options - fix potential NULL dereference in free_adapter() - Ian Munsie - Update process element after allocating interrupts - Add support for CAPP DMA mode - Fix allowing bogus AFU descriptors with 0 maximum processes - Fix allocating a minimum of 2 pages for the SPA - Fix bug where AFU disable operation had no effect - Workaround XSL bug that does not clear the RA bit after a reset - Fix NULL pointer dereference on kernel contexts with no AFU interrupts - powerpc/powernv: Split cxl code out into a separate file - Add cxl_slot_is_supported API - Enable bus mastering for devices using CAPP DMA mode - Move cxl_afu_get / cxl_afu_put to base - Allow a default context to be associated with an external pci_dev - Do not create vPHB if there are no AFU configuration records - powerpc/powernv: Add support for the cxl kernel api on the real phb - Add support for using the kernel API with a real PHB - Add kernel APIs to get & set the max irqs per context - Add preliminary workaround for CX4 interrupt limitation - Add support for interrupts on the Mellanox CX4 - Workaround PE=0 hardware limitation in Mellanox CX4 - powerpc/powernv: Fix pci-cxl.c build when CONFIG_MODULES=n selftests: - Test unaligned copy and paste from Chris Smart - Load Monitor Register Tests from Jack Miller - Cyril Bur - exec() with suspended transaction - Use signed long to read perf_event_paranoid - Fix usage message in context_switch - Fix generation of vector instructions/types in context_switch - Michael Ellerman - Use "Delta" rather than "Error" in normal output - Import Anton's mmap & futex micro benchmarks - Add a test for PROT_SAO" * tag 'powerpc-4.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (263 commits) powerpc/mm: Parenthesise IS_ENABLED() in if condition tty/hvc: Use opal irqchip interface if available tty/hvc: Use IRQF_SHARED for OPAL hvc consoles selftests/powerpc: exec() with suspended transaction powerpc: Improve comment explaining why we modify VRSAVE powerpc/mm: Drop unused externs for hpte_init_beat[_v3]() powerpc/mm: Rename hpte_init_lpar() and move the fallback to a header powerpc/mm: Fix build break when PPC_NATIVE=n crypto: vmx - Convert to CPU feature based module autoloading powerpc: Add module autoloading based on CPU features powerpc/powernv/ioda: Fix endianness when reading TCEs powerpc/mm: Add memory barrier in __hugepte_alloc() powerpc/modules: Never restore r2 for a mprofile-kernel style mcount() call powerpc/ftrace: Separate the heuristics for checking call sites powerpc: Merge 32-bit and 64-bit setup_arch() powerpc/64: Make a few boot functions __init powerpc: Re-order setup_panic() powerpc: Re-order the call to smp_setup_cpu_maps() powerpc/32: Move cache info inits to a separate function powerpc/64: Move the content of setup_system() to setup_arch() ...
This commit is contained in:
@@ -5,7 +5,7 @@ obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
|
||||
obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
|
||||
obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
|
||||
power5+-pmu.o power6-pmu.o power7-pmu.o \
|
||||
power8-pmu.o
|
||||
isa207-common.o power8-pmu.o power9-pmu.o
|
||||
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
|
||||
|
||||
obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
|
||||
|
@@ -992,7 +992,7 @@ static u64 check_and_compute_delta(u64 prev, u64 val)
|
||||
* than the previous value it will cause the delta and the counter to
|
||||
* have bogus values unless we rolled a counter over. If a coutner is
|
||||
* rolled back, it will be smaller, but within 256, which is the maximum
|
||||
* number of events to rollback at once. If we dectect a rollback
|
||||
* number of events to rollback at once. If we detect a rollback
|
||||
* return 0. This can lead to a small lack of precision in the
|
||||
* counters.
|
||||
*/
|
||||
|
@@ -1298,7 +1298,7 @@ static void h_24x7_event_read(struct perf_event *event)
|
||||
__this_cpu_write(hv_24x7_txn_err, ret);
|
||||
} else {
|
||||
/*
|
||||
* Assoicate the event with the HCALL request index,
|
||||
* Associate the event with the HCALL request index,
|
||||
* so ->commit_txn() can quickly find/update count.
|
||||
*/
|
||||
i = request_buffer->num_requests - 1;
|
||||
|
@@ -66,7 +66,7 @@ struct hv_24x7_result_element {
|
||||
/* -1 if @performance_domain does not refer to a virtual processor */
|
||||
__be32 lpar_cfg_instance_id;
|
||||
|
||||
/* size = @result_element_data_size of cointaining result. */
|
||||
/* size = @result_element_data_size of containing result. */
|
||||
__u64 element_data[1];
|
||||
} __packed;
|
||||
|
||||
|
263
arch/powerpc/perf/isa207-common.c
Normal file
263
arch/powerpc/perf/isa207-common.c
Normal file
@@ -0,0 +1,263 @@
|
||||
/*
|
||||
* Common Performance counter support functions for PowerISA v2.07 processors.
|
||||
*
|
||||
* Copyright 2009 Paul Mackerras, IBM Corporation.
|
||||
* Copyright 2013 Michael Ellerman, IBM Corporation.
|
||||
* Copyright 2016 Madhavan Srinivasan, IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
#include "isa207-common.h"
|
||||
|
||||
static inline bool event_is_fab_match(u64 event)
|
||||
{
|
||||
/* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */
|
||||
event &= 0xff0fe;
|
||||
|
||||
/* PM_MRK_FAB_RSP_MATCH & PM_MRK_FAB_RSP_MATCH_CYC */
|
||||
return (event == 0x30056 || event == 0x4f052);
|
||||
}
|
||||
|
||||
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
|
||||
{
|
||||
unsigned int unit, pmc, cache, ebb;
|
||||
unsigned long mask, value;
|
||||
|
||||
mask = value = 0;
|
||||
|
||||
if (event & ~EVENT_VALID_MASK)
|
||||
return -1;
|
||||
|
||||
pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
|
||||
unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
|
||||
cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK;
|
||||
ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK;
|
||||
|
||||
if (pmc) {
|
||||
u64 base_event;
|
||||
|
||||
if (pmc > 6)
|
||||
return -1;
|
||||
|
||||
/* Ignore Linux defined bits when checking event below */
|
||||
base_event = event & ~EVENT_LINUX_MASK;
|
||||
|
||||
if (pmc >= 5 && base_event != 0x500fa &&
|
||||
base_event != 0x600f4)
|
||||
return -1;
|
||||
|
||||
mask |= CNST_PMC_MASK(pmc);
|
||||
value |= CNST_PMC_VAL(pmc);
|
||||
}
|
||||
|
||||
if (pmc <= 4) {
|
||||
/*
|
||||
* Add to number of counters in use. Note this includes events with
|
||||
* a PMC of 0 - they still need a PMC, it's just assigned later.
|
||||
* Don't count events on PMC 5 & 6, there is only one valid event
|
||||
* on each of those counters, and they are handled above.
|
||||
*/
|
||||
mask |= CNST_NC_MASK;
|
||||
value |= CNST_NC_VAL;
|
||||
}
|
||||
|
||||
if (unit >= 6 && unit <= 9) {
|
||||
/*
|
||||
* L2/L3 events contain a cache selector field, which is
|
||||
* supposed to be programmed into MMCRC. However MMCRC is only
|
||||
* HV writable, and there is no API for guest kernels to modify
|
||||
* it. The solution is for the hypervisor to initialise the
|
||||
* field to zeroes, and for us to only ever allow events that
|
||||
* have a cache selector of zero. The bank selector (bit 3) is
|
||||
* irrelevant, as long as the rest of the value is 0.
|
||||
*/
|
||||
if (cache & 0x7)
|
||||
return -1;
|
||||
|
||||
} else if (event & EVENT_IS_L1) {
|
||||
mask |= CNST_L1_QUAL_MASK;
|
||||
value |= CNST_L1_QUAL_VAL(cache);
|
||||
}
|
||||
|
||||
if (event & EVENT_IS_MARKED) {
|
||||
mask |= CNST_SAMPLE_MASK;
|
||||
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
|
||||
* the threshold control bits are used for the match value.
|
||||
*/
|
||||
if (event_is_fab_match(event)) {
|
||||
mask |= CNST_FAB_MATCH_MASK;
|
||||
value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT);
|
||||
} else {
|
||||
/*
|
||||
* Check the mantissa upper two bits are not zero, unless the
|
||||
* exponent is also zero. See the THRESH_CMP_MANTISSA doc.
|
||||
*/
|
||||
unsigned int cmp, exp;
|
||||
|
||||
cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
|
||||
exp = cmp >> 7;
|
||||
|
||||
if (exp && (cmp & 0x60) == 0)
|
||||
return -1;
|
||||
|
||||
mask |= CNST_THRESH_MASK;
|
||||
value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
|
||||
}
|
||||
|
||||
if (!pmc && ebb)
|
||||
/* EBB events must specify the PMC */
|
||||
return -1;
|
||||
|
||||
if (event & EVENT_WANTS_BHRB) {
|
||||
if (!ebb)
|
||||
/* Only EBB events can request BHRB */
|
||||
return -1;
|
||||
|
||||
mask |= CNST_IFM_MASK;
|
||||
value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* All events must agree on EBB, either all request it or none.
|
||||
* EBB events are pinned & exclusive, so this should never actually
|
||||
* hit, but we leave it as a fallback in case.
|
||||
*/
|
||||
mask |= CNST_EBB_VAL(ebb);
|
||||
value |= CNST_EBB_MASK;
|
||||
|
||||
*maskp = mask;
|
||||
*valp = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int isa207_compute_mmcr(u64 event[], int n_ev,
|
||||
unsigned int hwc[], unsigned long mmcr[],
|
||||
struct perf_event *pevents[])
|
||||
{
|
||||
unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
|
||||
unsigned int pmc, pmc_inuse;
|
||||
int i;
|
||||
|
||||
pmc_inuse = 0;
|
||||
|
||||
/* First pass to count resource use */
|
||||
for (i = 0; i < n_ev; ++i) {
|
||||
pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
|
||||
if (pmc)
|
||||
pmc_inuse |= 1 << pmc;
|
||||
}
|
||||
|
||||
/* In continuous sampling mode, update SDAR on TLB miss */
|
||||
mmcra = MMCRA_SDAR_MODE_TLB;
|
||||
mmcr1 = mmcr2 = 0;
|
||||
|
||||
/* Second pass: assign PMCs, set all MMCR1 fields */
|
||||
for (i = 0; i < n_ev; ++i) {
|
||||
pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
|
||||
unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
|
||||
combine = (event[i] >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK;
|
||||
psel = event[i] & EVENT_PSEL_MASK;
|
||||
|
||||
if (!pmc) {
|
||||
for (pmc = 1; pmc <= 4; ++pmc) {
|
||||
if (!(pmc_inuse & (1 << pmc)))
|
||||
break;
|
||||
}
|
||||
|
||||
pmc_inuse |= 1 << pmc;
|
||||
}
|
||||
|
||||
if (pmc <= 4) {
|
||||
mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc);
|
||||
mmcr1 |= combine << MMCR1_COMBINE_SHIFT(pmc);
|
||||
mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc);
|
||||
}
|
||||
|
||||
if (event[i] & EVENT_IS_L1) {
|
||||
cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
|
||||
mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT;
|
||||
cache >>= 1;
|
||||
mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
|
||||
}
|
||||
|
||||
if (event[i] & EVENT_IS_MARKED) {
|
||||
mmcra |= MMCRA_SAMPLE_ENABLE;
|
||||
|
||||
val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
|
||||
if (val) {
|
||||
mmcra |= (val & 3) << MMCRA_SAMP_MODE_SHIFT;
|
||||
mmcra |= (val >> 2) << MMCRA_SAMP_ELIG_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
|
||||
* the threshold bits are used for the match value.
|
||||
*/
|
||||
if (event_is_fab_match(event[i])) {
|
||||
mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) &
|
||||
EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;
|
||||
} else {
|
||||
val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
|
||||
mmcra |= val << MMCRA_THR_CTL_SHIFT;
|
||||
val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
|
||||
mmcra |= val << MMCRA_THR_SEL_SHIFT;
|
||||
val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
|
||||
mmcra |= val << MMCRA_THR_CMP_SHIFT;
|
||||
}
|
||||
|
||||
if (event[i] & EVENT_WANTS_BHRB) {
|
||||
val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK;
|
||||
mmcra |= val << MMCRA_IFM_SHIFT;
|
||||
}
|
||||
|
||||
if (pevents[i]->attr.exclude_user)
|
||||
mmcr2 |= MMCR2_FCP(pmc);
|
||||
|
||||
if (pevents[i]->attr.exclude_hv)
|
||||
mmcr2 |= MMCR2_FCH(pmc);
|
||||
|
||||
if (pevents[i]->attr.exclude_kernel) {
|
||||
if (cpu_has_feature(CPU_FTR_HVMODE))
|
||||
mmcr2 |= MMCR2_FCH(pmc);
|
||||
else
|
||||
mmcr2 |= MMCR2_FCS(pmc);
|
||||
}
|
||||
|
||||
hwc[i] = pmc - 1;
|
||||
}
|
||||
|
||||
/* Return MMCRx values */
|
||||
mmcr[0] = 0;
|
||||
|
||||
/* pmc_inuse is 1-based */
|
||||
if (pmc_inuse & 2)
|
||||
mmcr[0] = MMCR0_PMC1CE;
|
||||
|
||||
if (pmc_inuse & 0x7c)
|
||||
mmcr[0] |= MMCR0_PMCjCE;
|
||||
|
||||
/* If we're not using PMC 5 or 6, freeze them */
|
||||
if (!(pmc_inuse & 0x60))
|
||||
mmcr[0] |= MMCR0_FC56;
|
||||
|
||||
mmcr[1] = mmcr1;
|
||||
mmcr[2] = mmcra;
|
||||
mmcr[3] = mmcr2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[])
|
||||
{
|
||||
if (pmc <= 3)
|
||||
mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
|
||||
}
|
236
arch/powerpc/perf/isa207-common.h
Normal file
236
arch/powerpc/perf/isa207-common.h
Normal file
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
* Copyright 2009 Paul Mackerras, IBM Corporation.
|
||||
* Copyright 2013 Michael Ellerman, IBM Corporation.
|
||||
* Copyright 2016 Madhavan Srinivasan, IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or any later version.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_POWERPC_PERF_ISA207_COMMON_H_
|
||||
#define _LINUX_POWERPC_PERF_ISA207_COMMON_H_
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/cputable.h>
|
||||
|
||||
/*
|
||||
* Raw event encoding for PowerISA v2.07:
|
||||
*
|
||||
* 60 56 52 48 44 40 36 32
|
||||
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
|
||||
* | | [ ] [ thresh_cmp ] [ thresh_ctl ]
|
||||
* | | | |
|
||||
* | | *- IFM (Linux) thresh start/stop OR FAB match -*
|
||||
* | *- BHRB (Linux)
|
||||
* *- EBB (Linux)
|
||||
*
|
||||
* 28 24 20 16 12 8 4 0
|
||||
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
|
||||
* [ ] [ sample ] [cache] [ pmc ] [unit ] c m [ pmcxsel ]
|
||||
* | | | | |
|
||||
* | | | | *- mark
|
||||
* | | *- L1/L2/L3 cache_sel |
|
||||
* | | |
|
||||
* | *- sampling mode for marked events *- combine
|
||||
* |
|
||||
* *- thresh_sel
|
||||
*
|
||||
* Below uses IBM bit numbering.
|
||||
*
|
||||
* MMCR1[x:y] = unit (PMCxUNIT)
|
||||
* MMCR1[x] = combine (PMCxCOMB)
|
||||
*
|
||||
* if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
|
||||
* # PM_MRK_FAB_RSP_MATCH
|
||||
* MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
|
||||
* else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
|
||||
* # PM_MRK_FAB_RSP_MATCH_CYC
|
||||
* MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
|
||||
* else
|
||||
* MMCRA[48:55] = thresh_ctl (THRESH START/END)
|
||||
*
|
||||
* if thresh_sel:
|
||||
* MMCRA[45:47] = thresh_sel
|
||||
*
|
||||
* if thresh_cmp:
|
||||
* MMCRA[22:24] = thresh_cmp[0:2]
|
||||
* MMCRA[25:31] = thresh_cmp[3:9]
|
||||
*
|
||||
* if unit == 6 or unit == 7
|
||||
* MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL)
|
||||
* else if unit == 8 or unit == 9:
|
||||
* if cache_sel[0] == 0: # L3 bank
|
||||
* MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0)
|
||||
* else if cache_sel[0] == 1:
|
||||
* MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1)
|
||||
* else if cache_sel[1]: # L1 event
|
||||
* MMCR1[16] = cache_sel[2]
|
||||
* MMCR1[17] = cache_sel[3]
|
||||
*
|
||||
* if mark:
|
||||
* MMCRA[63] = 1 (SAMPLE_ENABLE)
|
||||
* MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG)
|
||||
* MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE)
|
||||
*
|
||||
* if EBB and BHRB:
|
||||
* MMCRA[32:33] = IFM
|
||||
*
|
||||
*/
|
||||
|
||||
#define EVENT_EBB_MASK 1ull
|
||||
#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT
|
||||
#define EVENT_BHRB_MASK 1ull
|
||||
#define EVENT_BHRB_SHIFT 62
|
||||
#define EVENT_WANTS_BHRB (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT)
|
||||
#define EVENT_IFM_MASK 3ull
|
||||
#define EVENT_IFM_SHIFT 60
|
||||
#define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */
|
||||
#define EVENT_THR_CMP_MASK 0x3ff
|
||||
#define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */
|
||||
#define EVENT_THR_CTL_MASK 0xffull
|
||||
#define EVENT_THR_SEL_SHIFT 29 /* Threshold select value */
|
||||
#define EVENT_THR_SEL_MASK 0x7
|
||||
#define EVENT_THRESH_SHIFT 29 /* All threshold bits */
|
||||
#define EVENT_THRESH_MASK 0x1fffffull
|
||||
#define EVENT_SAMPLE_SHIFT 24 /* Sampling mode & eligibility */
|
||||
#define EVENT_SAMPLE_MASK 0x1f
|
||||
#define EVENT_CACHE_SEL_SHIFT 20 /* L2/L3 cache select */
|
||||
#define EVENT_CACHE_SEL_MASK 0xf
|
||||
#define EVENT_IS_L1 (4 << EVENT_CACHE_SEL_SHIFT)
|
||||
#define EVENT_PMC_SHIFT 16 /* PMC number (1-based) */
|
||||
#define EVENT_PMC_MASK 0xf
|
||||
#define EVENT_UNIT_SHIFT 12 /* Unit */
|
||||
#define EVENT_UNIT_MASK 0xf
|
||||
#define EVENT_COMBINE_SHIFT 11 /* Combine bit */
|
||||
#define EVENT_COMBINE_MASK 0x1
|
||||
#define EVENT_MARKED_SHIFT 8 /* Marked bit */
|
||||
#define EVENT_MARKED_MASK 0x1
|
||||
#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
|
||||
#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */
|
||||
|
||||
/* Bits defined by Linux */
|
||||
#define EVENT_LINUX_MASK \
|
||||
((EVENT_EBB_MASK << EVENT_EBB_SHIFT) | \
|
||||
(EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) | \
|
||||
(EVENT_IFM_MASK << EVENT_IFM_SHIFT))
|
||||
|
||||
#define EVENT_VALID_MASK \
|
||||
((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
|
||||
(EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
|
||||
(EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
|
||||
(EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
|
||||
(EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
|
||||
(EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \
|
||||
(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
|
||||
EVENT_LINUX_MASK | \
|
||||
EVENT_PSEL_MASK)
|
||||
|
||||
#define ONLY_PLM \
|
||||
(PERF_SAMPLE_BRANCH_USER |\
|
||||
PERF_SAMPLE_BRANCH_KERNEL |\
|
||||
PERF_SAMPLE_BRANCH_HV)
|
||||
|
||||
/*
|
||||
* Layout of constraint bits:
|
||||
*
|
||||
* 60 56 52 48 44 40 36 32
|
||||
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
|
||||
* [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ]
|
||||
* |
|
||||
* thresh_sel -*
|
||||
*
|
||||
* 28 24 20 16 12 8 4 0
|
||||
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
|
||||
* [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1]
|
||||
* | | | |
|
||||
* BHRB IFM -* | | | Count of events for each PMC.
|
||||
* EBB -* | | p1, p2, p3, p4, p5, p6.
|
||||
* L1 I/D qualifier -* |
|
||||
* nc - number of counters -*
|
||||
*
|
||||
* The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
|
||||
* we want the low bit of each field to be added to any existing value.
|
||||
*
|
||||
* Everything else is a value field.
|
||||
*/
|
||||
|
||||
#define CNST_FAB_MATCH_VAL(v) (((v) & EVENT_THR_CTL_MASK) << 56)
|
||||
#define CNST_FAB_MATCH_MASK CNST_FAB_MATCH_VAL(EVENT_THR_CTL_MASK)
|
||||
|
||||
/* We just throw all the threshold bits into the constraint */
|
||||
#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32)
|
||||
#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK)
|
||||
|
||||
#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24)
|
||||
#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK)
|
||||
|
||||
#define CNST_IFM_VAL(v) (((v) & EVENT_IFM_MASK) << 25)
|
||||
#define CNST_IFM_MASK CNST_IFM_VAL(EVENT_IFM_MASK)
|
||||
|
||||
#define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22)
|
||||
#define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3)
|
||||
|
||||
#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16)
|
||||
#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK)
|
||||
|
||||
/*
|
||||
* For NC we are counting up to 4 events. This requires three bits, and we need
|
||||
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
|
||||
* fields by 3 in test_adder.
|
||||
*/
|
||||
#define CNST_NC_SHIFT 12
|
||||
#define CNST_NC_VAL (1 << CNST_NC_SHIFT)
|
||||
#define CNST_NC_MASK (8 << CNST_NC_SHIFT)
|
||||
#define ISA207_TEST_ADDER (3 << CNST_NC_SHIFT)
|
||||
|
||||
/*
|
||||
* For the per-PMC fields we have two bits. The low bit is added, so if two
|
||||
* events ask for the same PMC the sum will overflow, setting the high bit,
|
||||
* indicating an error. So our mask sets the high bit.
|
||||
*/
|
||||
#define CNST_PMC_SHIFT(pmc) ((pmc - 1) * 2)
|
||||
#define CNST_PMC_VAL(pmc) (1 << CNST_PMC_SHIFT(pmc))
|
||||
#define CNST_PMC_MASK(pmc) (2 << CNST_PMC_SHIFT(pmc))
|
||||
|
||||
/* Our add_fields is defined as: */
|
||||
#define ISA207_ADD_FIELDS \
|
||||
CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \
|
||||
CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL
|
||||
|
||||
|
||||
/* Bits in MMCR1 for PowerISA v2.07 */
|
||||
#define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1)))
|
||||
#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1))
|
||||
#define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8)
|
||||
#define MMCR1_FAB_SHIFT 36
|
||||
#define MMCR1_DC_QUAL_SHIFT 47
|
||||
#define MMCR1_IC_QUAL_SHIFT 46
|
||||
|
||||
/* Bits in MMCRA for PowerISA v2.07 */
|
||||
#define MMCRA_SAMP_MODE_SHIFT 1
|
||||
#define MMCRA_SAMP_ELIG_SHIFT 4
|
||||
#define MMCRA_THR_CTL_SHIFT 8
|
||||
#define MMCRA_THR_SEL_SHIFT 16
|
||||
#define MMCRA_THR_CMP_SHIFT 32
|
||||
#define MMCRA_SDAR_MODE_TLB (1ull << 42)
|
||||
#define MMCRA_IFM_SHIFT 30
|
||||
|
||||
/* Bits in MMCR2 for PowerISA v2.07 */
|
||||
#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9)))
|
||||
#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9)))
|
||||
#define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9)))
|
||||
|
||||
#define MAX_ALT 2
|
||||
#define MAX_PMU_COUNTERS 6
|
||||
|
||||
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp);
|
||||
int isa207_compute_mmcr(u64 event[], int n_ev,
|
||||
unsigned int hwc[], unsigned long mmcr[],
|
||||
struct perf_event *pevents[]);
|
||||
void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]);
|
||||
|
||||
#endif
|
@@ -12,10 +12,7 @@
|
||||
|
||||
#define pr_fmt(fmt) "power8-pmu: " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/cputable.h>
|
||||
#include "isa207-common.h"
|
||||
|
||||
/*
|
||||
* Some power8 event codes.
|
||||
@@ -28,465 +25,11 @@ enum {
|
||||
|
||||
#undef EVENT
|
||||
|
||||
/*
|
||||
* Raw event encoding for POWER8:
|
||||
*
|
||||
* 60 56 52 48 44 40 36 32
|
||||
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
|
||||
* | | [ ] [ thresh_cmp ] [ thresh_ctl ]
|
||||
* | | | |
|
||||
* | | *- IFM (Linux) thresh start/stop OR FAB match -*
|
||||
* | *- BHRB (Linux)
|
||||
* *- EBB (Linux)
|
||||
*
|
||||
* 28 24 20 16 12 8 4 0
|
||||
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
|
||||
* [ ] [ sample ] [cache] [ pmc ] [unit ] c m [ pmcxsel ]
|
||||
* | | | | |
|
||||
* | | | | *- mark
|
||||
* | | *- L1/L2/L3 cache_sel |
|
||||
* | | |
|
||||
* | *- sampling mode for marked events *- combine
|
||||
* |
|
||||
* *- thresh_sel
|
||||
*
|
||||
* Below uses IBM bit numbering.
|
||||
*
|
||||
* MMCR1[x:y] = unit (PMCxUNIT)
|
||||
* MMCR1[x] = combine (PMCxCOMB)
|
||||
*
|
||||
* if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
|
||||
* # PM_MRK_FAB_RSP_MATCH
|
||||
* MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
|
||||
* else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
|
||||
* # PM_MRK_FAB_RSP_MATCH_CYC
|
||||
* MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
|
||||
* else
|
||||
* MMCRA[48:55] = thresh_ctl (THRESH START/END)
|
||||
*
|
||||
* if thresh_sel:
|
||||
* MMCRA[45:47] = thresh_sel
|
||||
*
|
||||
* if thresh_cmp:
|
||||
* MMCRA[22:24] = thresh_cmp[0:2]
|
||||
* MMCRA[25:31] = thresh_cmp[3:9]
|
||||
*
|
||||
* if unit == 6 or unit == 7
|
||||
* MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL)
|
||||
* else if unit == 8 or unit == 9:
|
||||
* if cache_sel[0] == 0: # L3 bank
|
||||
* MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0)
|
||||
* else if cache_sel[0] == 1:
|
||||
* MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1)
|
||||
* else if cache_sel[1]: # L1 event
|
||||
* MMCR1[16] = cache_sel[2]
|
||||
* MMCR1[17] = cache_sel[3]
|
||||
*
|
||||
* if mark:
|
||||
* MMCRA[63] = 1 (SAMPLE_ENABLE)
|
||||
* MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG)
|
||||
* MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE)
|
||||
*
|
||||
* if EBB and BHRB:
|
||||
* MMCRA[32:33] = IFM
|
||||
*
|
||||
*/
|
||||
|
||||
#define EVENT_EBB_MASK 1ull
|
||||
#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT
|
||||
#define EVENT_BHRB_MASK 1ull
|
||||
#define EVENT_BHRB_SHIFT 62
|
||||
#define EVENT_WANTS_BHRB (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT)
|
||||
#define EVENT_IFM_MASK 3ull
|
||||
#define EVENT_IFM_SHIFT 60
|
||||
#define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */
|
||||
#define EVENT_THR_CMP_MASK 0x3ff
|
||||
#define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */
|
||||
#define EVENT_THR_CTL_MASK 0xffull
|
||||
#define EVENT_THR_SEL_SHIFT 29 /* Threshold select value */
|
||||
#define EVENT_THR_SEL_MASK 0x7
|
||||
#define EVENT_THRESH_SHIFT 29 /* All threshold bits */
|
||||
#define EVENT_THRESH_MASK 0x1fffffull
|
||||
#define EVENT_SAMPLE_SHIFT 24 /* Sampling mode & eligibility */
|
||||
#define EVENT_SAMPLE_MASK 0x1f
|
||||
#define EVENT_CACHE_SEL_SHIFT 20 /* L2/L3 cache select */
|
||||
#define EVENT_CACHE_SEL_MASK 0xf
|
||||
#define EVENT_IS_L1 (4 << EVENT_CACHE_SEL_SHIFT)
|
||||
#define EVENT_PMC_SHIFT 16 /* PMC number (1-based) */
|
||||
#define EVENT_PMC_MASK 0xf
|
||||
#define EVENT_UNIT_SHIFT 12 /* Unit */
|
||||
#define EVENT_UNIT_MASK 0xf
|
||||
#define EVENT_COMBINE_SHIFT 11 /* Combine bit */
|
||||
#define EVENT_COMBINE_MASK 0x1
|
||||
#define EVENT_MARKED_SHIFT 8 /* Marked bit */
|
||||
#define EVENT_MARKED_MASK 0x1
|
||||
#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
|
||||
#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */
|
||||
|
||||
/* Bits defined by Linux */
|
||||
#define EVENT_LINUX_MASK \
|
||||
((EVENT_EBB_MASK << EVENT_EBB_SHIFT) | \
|
||||
(EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) | \
|
||||
(EVENT_IFM_MASK << EVENT_IFM_SHIFT))
|
||||
|
||||
#define EVENT_VALID_MASK \
|
||||
((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
|
||||
(EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
|
||||
(EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
|
||||
(EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
|
||||
(EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
|
||||
(EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \
|
||||
(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
|
||||
EVENT_LINUX_MASK | \
|
||||
EVENT_PSEL_MASK)
|
||||
|
||||
/* MMCRA IFM bits - POWER8 */
|
||||
#define POWER8_MMCRA_IFM1 0x0000000040000000UL
|
||||
#define POWER8_MMCRA_IFM2 0x0000000080000000UL
|
||||
#define POWER8_MMCRA_IFM3 0x00000000C0000000UL
|
||||
|
||||
#define ONLY_PLM \
|
||||
(PERF_SAMPLE_BRANCH_USER |\
|
||||
PERF_SAMPLE_BRANCH_KERNEL |\
|
||||
PERF_SAMPLE_BRANCH_HV)
|
||||
|
||||
/*
|
||||
* Layout of constraint bits:
|
||||
*
|
||||
* 60 56 52 48 44 40 36 32
|
||||
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
|
||||
* [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ]
|
||||
* |
|
||||
* thresh_sel -*
|
||||
*
|
||||
* 28 24 20 16 12 8 4 0
|
||||
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
|
||||
* [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1]
|
||||
* | | | |
|
||||
* BHRB IFM -* | | | Count of events for each PMC.
|
||||
* EBB -* | | p1, p2, p3, p4, p5, p6.
|
||||
* L1 I/D qualifier -* |
|
||||
* nc - number of counters -*
|
||||
*
|
||||
* The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
|
||||
* we want the low bit of each field to be added to any existing value.
|
||||
*
|
||||
* Everything else is a value field.
|
||||
*/
|
||||
|
||||
#define CNST_FAB_MATCH_VAL(v) (((v) & EVENT_THR_CTL_MASK) << 56)
|
||||
#define CNST_FAB_MATCH_MASK CNST_FAB_MATCH_VAL(EVENT_THR_CTL_MASK)
|
||||
|
||||
/* We just throw all the threshold bits into the constraint */
|
||||
#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32)
|
||||
#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK)
|
||||
|
||||
#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24)
|
||||
#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK)
|
||||
|
||||
#define CNST_IFM_VAL(v) (((v) & EVENT_IFM_MASK) << 25)
|
||||
#define CNST_IFM_MASK CNST_IFM_VAL(EVENT_IFM_MASK)
|
||||
|
||||
#define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22)
|
||||
#define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3)
|
||||
|
||||
#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16)
|
||||
#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK)
|
||||
|
||||
/*
|
||||
* For NC we are counting up to 4 events. This requires three bits, and we need
|
||||
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
|
||||
* fields by 3 in test_adder.
|
||||
*/
|
||||
#define CNST_NC_SHIFT 12
|
||||
#define CNST_NC_VAL (1 << CNST_NC_SHIFT)
|
||||
#define CNST_NC_MASK (8 << CNST_NC_SHIFT)
|
||||
#define POWER8_TEST_ADDER (3 << CNST_NC_SHIFT)
|
||||
|
||||
/*
|
||||
* For the per-PMC fields we have two bits. The low bit is added, so if two
|
||||
* events ask for the same PMC the sum will overflow, setting the high bit,
|
||||
* indicating an error. So our mask sets the high bit.
|
||||
*/
|
||||
#define CNST_PMC_SHIFT(pmc) ((pmc - 1) * 2)
|
||||
#define CNST_PMC_VAL(pmc) (1 << CNST_PMC_SHIFT(pmc))
|
||||
#define CNST_PMC_MASK(pmc) (2 << CNST_PMC_SHIFT(pmc))
|
||||
|
||||
/* Our add_fields is defined as: */
|
||||
#define POWER8_ADD_FIELDS \
|
||||
CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \
|
||||
CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL
|
||||
|
||||
|
||||
/* Bits in MMCR1 for POWER8 */
|
||||
#define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1)))
|
||||
#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1))
|
||||
#define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8)
|
||||
#define MMCR1_FAB_SHIFT 36
|
||||
#define MMCR1_DC_QUAL_SHIFT 47
|
||||
#define MMCR1_IC_QUAL_SHIFT 46
|
||||
|
||||
/* Bits in MMCRA for POWER8 */
|
||||
#define MMCRA_SAMP_MODE_SHIFT 1
|
||||
#define MMCRA_SAMP_ELIG_SHIFT 4
|
||||
#define MMCRA_THR_CTL_SHIFT 8
|
||||
#define MMCRA_THR_SEL_SHIFT 16
|
||||
#define MMCRA_THR_CMP_SHIFT 32
|
||||
#define MMCRA_SDAR_MODE_TLB (1ull << 42)
|
||||
#define MMCRA_IFM_SHIFT 30
|
||||
|
||||
/* Bits in MMCR2 for POWER8 */
|
||||
#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9)))
|
||||
#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9)))
|
||||
#define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9)))
|
||||
|
||||
|
||||
static inline bool event_is_fab_match(u64 event)
|
||||
{
|
||||
/* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */
|
||||
event &= 0xff0fe;
|
||||
|
||||
/* PM_MRK_FAB_RSP_MATCH & PM_MRK_FAB_RSP_MATCH_CYC */
|
||||
return (event == 0x30056 || event == 0x4f052);
|
||||
}
|
||||
|
||||
static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
|
||||
{
|
||||
unsigned int unit, pmc, cache, ebb;
|
||||
unsigned long mask, value;
|
||||
|
||||
mask = value = 0;
|
||||
|
||||
if (event & ~EVENT_VALID_MASK)
|
||||
return -1;
|
||||
|
||||
pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
|
||||
unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
|
||||
cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK;
|
||||
ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK;
|
||||
|
||||
if (pmc) {
|
||||
u64 base_event;
|
||||
|
||||
if (pmc > 6)
|
||||
return -1;
|
||||
|
||||
/* Ignore Linux defined bits when checking event below */
|
||||
base_event = event & ~EVENT_LINUX_MASK;
|
||||
|
||||
if (pmc >= 5 && base_event != PM_RUN_INST_CMPL &&
|
||||
base_event != PM_RUN_CYC)
|
||||
return -1;
|
||||
|
||||
mask |= CNST_PMC_MASK(pmc);
|
||||
value |= CNST_PMC_VAL(pmc);
|
||||
}
|
||||
|
||||
if (pmc <= 4) {
|
||||
/*
|
||||
* Add to number of counters in use. Note this includes events with
|
||||
* a PMC of 0 - they still need a PMC, it's just assigned later.
|
||||
* Don't count events on PMC 5 & 6, there is only one valid event
|
||||
* on each of those counters, and they are handled above.
|
||||
*/
|
||||
mask |= CNST_NC_MASK;
|
||||
value |= CNST_NC_VAL;
|
||||
}
|
||||
|
||||
if (unit >= 6 && unit <= 9) {
|
||||
/*
|
||||
* L2/L3 events contain a cache selector field, which is
|
||||
* supposed to be programmed into MMCRC. However MMCRC is only
|
||||
* HV writable, and there is no API for guest kernels to modify
|
||||
* it. The solution is for the hypervisor to initialise the
|
||||
* field to zeroes, and for us to only ever allow events that
|
||||
* have a cache selector of zero. The bank selector (bit 3) is
|
||||
* irrelevant, as long as the rest of the value is 0.
|
||||
*/
|
||||
if (cache & 0x7)
|
||||
return -1;
|
||||
|
||||
} else if (event & EVENT_IS_L1) {
|
||||
mask |= CNST_L1_QUAL_MASK;
|
||||
value |= CNST_L1_QUAL_VAL(cache);
|
||||
}
|
||||
|
||||
if (event & EVENT_IS_MARKED) {
|
||||
mask |= CNST_SAMPLE_MASK;
|
||||
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
|
||||
* the threshold control bits are used for the match value.
|
||||
*/
|
||||
if (event_is_fab_match(event)) {
|
||||
mask |= CNST_FAB_MATCH_MASK;
|
||||
value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT);
|
||||
} else {
|
||||
/*
|
||||
* Check the mantissa upper two bits are not zero, unless the
|
||||
* exponent is also zero. See the THRESH_CMP_MANTISSA doc.
|
||||
*/
|
||||
unsigned int cmp, exp;
|
||||
|
||||
cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
|
||||
exp = cmp >> 7;
|
||||
|
||||
if (exp && (cmp & 0x60) == 0)
|
||||
return -1;
|
||||
|
||||
mask |= CNST_THRESH_MASK;
|
||||
value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
|
||||
}
|
||||
|
||||
if (!pmc && ebb)
|
||||
/* EBB events must specify the PMC */
|
||||
return -1;
|
||||
|
||||
if (event & EVENT_WANTS_BHRB) {
|
||||
if (!ebb)
|
||||
/* Only EBB events can request BHRB */
|
||||
return -1;
|
||||
|
||||
mask |= CNST_IFM_MASK;
|
||||
value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* All events must agree on EBB, either all request it or none.
|
||||
* EBB events are pinned & exclusive, so this should never actually
|
||||
* hit, but we leave it as a fallback in case.
|
||||
*/
|
||||
mask |= CNST_EBB_VAL(ebb);
|
||||
value |= CNST_EBB_MASK;
|
||||
|
||||
*maskp = mask;
|
||||
*valp = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int power8_compute_mmcr(u64 event[], int n_ev,
|
||||
unsigned int hwc[], unsigned long mmcr[],
|
||||
struct perf_event *pevents[])
|
||||
{
|
||||
unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
|
||||
unsigned int pmc, pmc_inuse;
|
||||
int i;
|
||||
|
||||
pmc_inuse = 0;
|
||||
|
||||
/* First pass to count resource use */
|
||||
for (i = 0; i < n_ev; ++i) {
|
||||
pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
|
||||
if (pmc)
|
||||
pmc_inuse |= 1 << pmc;
|
||||
}
|
||||
|
||||
/* In continuous sampling mode, update SDAR on TLB miss */
|
||||
mmcra = MMCRA_SDAR_MODE_TLB;
|
||||
mmcr1 = mmcr2 = 0;
|
||||
|
||||
/* Second pass: assign PMCs, set all MMCR1 fields */
|
||||
for (i = 0; i < n_ev; ++i) {
|
||||
pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
|
||||
unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
|
||||
combine = (event[i] >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK;
|
||||
psel = event[i] & EVENT_PSEL_MASK;
|
||||
|
||||
if (!pmc) {
|
||||
for (pmc = 1; pmc <= 4; ++pmc) {
|
||||
if (!(pmc_inuse & (1 << pmc)))
|
||||
break;
|
||||
}
|
||||
|
||||
pmc_inuse |= 1 << pmc;
|
||||
}
|
||||
|
||||
if (pmc <= 4) {
|
||||
mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc);
|
||||
mmcr1 |= combine << MMCR1_COMBINE_SHIFT(pmc);
|
||||
mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc);
|
||||
}
|
||||
|
||||
if (event[i] & EVENT_IS_L1) {
|
||||
cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
|
||||
mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT;
|
||||
cache >>= 1;
|
||||
mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
|
||||
}
|
||||
|
||||
if (event[i] & EVENT_IS_MARKED) {
|
||||
mmcra |= MMCRA_SAMPLE_ENABLE;
|
||||
|
||||
val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
|
||||
if (val) {
|
||||
mmcra |= (val & 3) << MMCRA_SAMP_MODE_SHIFT;
|
||||
mmcra |= (val >> 2) << MMCRA_SAMP_ELIG_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
|
||||
* the threshold bits are used for the match value.
|
||||
*/
|
||||
if (event_is_fab_match(event[i])) {
|
||||
mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) &
|
||||
EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;
|
||||
} else {
|
||||
val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
|
||||
mmcra |= val << MMCRA_THR_CTL_SHIFT;
|
||||
val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
|
||||
mmcra |= val << MMCRA_THR_SEL_SHIFT;
|
||||
val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
|
||||
mmcra |= val << MMCRA_THR_CMP_SHIFT;
|
||||
}
|
||||
|
||||
if (event[i] & EVENT_WANTS_BHRB) {
|
||||
val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK;
|
||||
mmcra |= val << MMCRA_IFM_SHIFT;
|
||||
}
|
||||
|
||||
if (pevents[i]->attr.exclude_user)
|
||||
mmcr2 |= MMCR2_FCP(pmc);
|
||||
|
||||
if (pevents[i]->attr.exclude_hv)
|
||||
mmcr2 |= MMCR2_FCH(pmc);
|
||||
|
||||
if (pevents[i]->attr.exclude_kernel) {
|
||||
if (cpu_has_feature(CPU_FTR_HVMODE))
|
||||
mmcr2 |= MMCR2_FCH(pmc);
|
||||
else
|
||||
mmcr2 |= MMCR2_FCS(pmc);
|
||||
}
|
||||
|
||||
hwc[i] = pmc - 1;
|
||||
}
|
||||
|
||||
/* Return MMCRx values */
|
||||
mmcr[0] = 0;
|
||||
|
||||
/* pmc_inuse is 1-based */
|
||||
if (pmc_inuse & 2)
|
||||
mmcr[0] = MMCR0_PMC1CE;
|
||||
|
||||
if (pmc_inuse & 0x7c)
|
||||
mmcr[0] |= MMCR0_PMCjCE;
|
||||
|
||||
/* If we're not using PMC 5 or 6, freeze them */
|
||||
if (!(pmc_inuse & 0x60))
|
||||
mmcr[0] |= MMCR0_FC56;
|
||||
|
||||
mmcr[1] = mmcr1;
|
||||
mmcr[2] = mmcra;
|
||||
mmcr[3] = mmcr2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MAX_ALT 2
|
||||
|
||||
/* Table of alternatives, sorted by column 0 */
|
||||
static const unsigned int event_alternatives[][MAX_ALT] = {
|
||||
{ PM_MRK_ST_CMPL, PM_MRK_ST_CMPL_ALT },
|
||||
@@ -567,12 +110,6 @@ static int power8_get_alternatives(u64 event, unsigned int flags, u64 alt[])
|
||||
return num_alt;
|
||||
}
|
||||
|
||||
static void power8_disable_pmc(unsigned int pmc, unsigned long mmcr[])
|
||||
{
|
||||
if (pmc <= 3)
|
||||
mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
|
||||
}
|
||||
|
||||
GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
|
||||
GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_GCT_NOSLOT_CYC);
|
||||
GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
|
||||
@@ -841,16 +378,16 @@ static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
|
||||
|
||||
static struct power_pmu power8_pmu = {
|
||||
.name = "POWER8",
|
||||
.n_counter = 6,
|
||||
.n_counter = MAX_PMU_COUNTERS,
|
||||
.max_alternatives = MAX_ALT + 1,
|
||||
.add_fields = POWER8_ADD_FIELDS,
|
||||
.test_adder = POWER8_TEST_ADDER,
|
||||
.compute_mmcr = power8_compute_mmcr,
|
||||
.add_fields = ISA207_ADD_FIELDS,
|
||||
.test_adder = ISA207_TEST_ADDER,
|
||||
.compute_mmcr = isa207_compute_mmcr,
|
||||
.config_bhrb = power8_config_bhrb,
|
||||
.bhrb_filter_map = power8_bhrb_filter_map,
|
||||
.get_constraint = power8_get_constraint,
|
||||
.get_constraint = isa207_get_constraint,
|
||||
.get_alternatives = power8_get_alternatives,
|
||||
.disable_pmc = power8_disable_pmc,
|
||||
.disable_pmc = isa207_disable_pmc,
|
||||
.flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
|
||||
.n_generic = ARRAY_SIZE(power8_generic_events),
|
||||
.generic_events = power8_generic_events,
|
||||
|
55
arch/powerpc/perf/power9-events-list.h
Normal file
55
arch/powerpc/perf/power9-events-list.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Performance counter support for POWER9 processors.
|
||||
*
|
||||
* Copyright 2016 Madhavan Srinivasan, IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Power9 event codes.
|
||||
*/
|
||||
EVENT(PM_CYC, 0x0001e)
|
||||
EVENT(PM_ICT_NOSLOT_CYC, 0x100f8)
|
||||
EVENT(PM_CMPLU_STALL, 0x1e054)
|
||||
EVENT(PM_INST_CMPL, 0x00002)
|
||||
EVENT(PM_BRU_CMPL, 0x40060)
|
||||
EVENT(PM_BR_MPRED_CMPL, 0x400f6)
|
||||
|
||||
/* All L1 D cache load references counted at finish, gated by reject */
|
||||
EVENT(PM_LD_REF_L1, 0x100fc)
|
||||
/* Load Missed L1 */
|
||||
EVENT(PM_LD_MISS_L1_FIN, 0x2c04e)
|
||||
/* Store Missed L1 */
|
||||
EVENT(PM_ST_MISS_L1, 0x300f0)
|
||||
/* L1 cache data prefetches */
|
||||
EVENT(PM_L1_PREF, 0x20054)
|
||||
/* Instruction fetches from L1 */
|
||||
EVENT(PM_INST_FROM_L1, 0x04080)
|
||||
/* Demand iCache Miss */
|
||||
EVENT(PM_L1_ICACHE_MISS, 0x200fd)
|
||||
/* Instruction Demand sectors wriittent into IL1 */
|
||||
EVENT(PM_L1_DEMAND_WRITE, 0x0408c)
|
||||
/* Instruction prefetch written into IL1 */
|
||||
EVENT(PM_IC_PREF_WRITE, 0x0408e)
|
||||
/* The data cache was reloaded from local core's L3 due to a demand load */
|
||||
EVENT(PM_DATA_FROM_L3, 0x4c042)
|
||||
/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
|
||||
EVENT(PM_DATA_FROM_L3MISS, 0x300fe)
|
||||
/* All successful D-side store dispatches for this thread */
|
||||
EVENT(PM_L2_ST, 0x16081)
|
||||
/* All successful D-side store dispatches for this thread that were L2 Miss */
|
||||
EVENT(PM_L2_ST_MISS, 0x26081)
|
||||
/* Total HW L3 prefetches(Load+store) */
|
||||
EVENT(PM_L3_PREF_ALL, 0x4e052)
|
||||
/* Data PTEG reload */
|
||||
EVENT(PM_DTLB_MISS, 0x300fc)
|
||||
/* ITLB Reloaded */
|
||||
EVENT(PM_ITLB_MISS, 0x400fc)
|
||||
/* Run_Instructions */
|
||||
EVENT(PM_RUN_INST_CMPL, 0x500fa)
|
||||
/* Run_cycles */
|
||||
EVENT(PM_RUN_CYC, 0x600f4)
|
330
arch/powerpc/perf/power9-pmu.c
Normal file
330
arch/powerpc/perf/power9-pmu.c
Normal file
@@ -0,0 +1,330 @@
|
||||
/*
|
||||
* Performance counter support for POWER9 processors.
|
||||
*
|
||||
* Copyright 2009 Paul Mackerras, IBM Corporation.
|
||||
* Copyright 2013 Michael Ellerman, IBM Corporation.
|
||||
* Copyright 2016 Madhavan Srinivasan, IBM Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or later version.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "power9-pmu: " fmt
|
||||
|
||||
#include "isa207-common.h"
|
||||
|
||||
/*
|
||||
* Some power9 event codes.
|
||||
*/
|
||||
#define EVENT(_name, _code) _name = _code,
|
||||
|
||||
enum {
|
||||
#include "power9-events-list.h"
|
||||
};
|
||||
|
||||
#undef EVENT
|
||||
|
||||
/* MMCRA IFM bits - POWER9 */
|
||||
#define POWER9_MMCRA_IFM1 0x0000000040000000UL
|
||||
#define POWER9_MMCRA_IFM2 0x0000000080000000UL
|
||||
#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
|
||||
|
||||
GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
|
||||
GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC);
|
||||
GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
|
||||
GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
|
||||
GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_CMPL);
|
||||
GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
|
||||
GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
|
||||
GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN);
|
||||
|
||||
CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1_FIN);
|
||||
CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
|
||||
CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF);
|
||||
CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
|
||||
CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
|
||||
CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
|
||||
CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
|
||||
CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
|
||||
CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
|
||||
CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
|
||||
CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
|
||||
CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
|
||||
CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
|
||||
CACHE_EVENT_ATTR(branch-loads, PM_BRU_CMPL);
|
||||
CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
|
||||
CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
|
||||
|
||||
static struct attribute *power9_events_attr[] = {
|
||||
GENERIC_EVENT_PTR(PM_CYC),
|
||||
GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC),
|
||||
GENERIC_EVENT_PTR(PM_CMPLU_STALL),
|
||||
GENERIC_EVENT_PTR(PM_INST_CMPL),
|
||||
GENERIC_EVENT_PTR(PM_BRU_CMPL),
|
||||
GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
|
||||
GENERIC_EVENT_PTR(PM_LD_REF_L1),
|
||||
GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
|
||||
CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN),
|
||||
CACHE_EVENT_PTR(PM_LD_REF_L1),
|
||||
CACHE_EVENT_PTR(PM_L1_PREF),
|
||||
CACHE_EVENT_PTR(PM_ST_MISS_L1),
|
||||
CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
|
||||
CACHE_EVENT_PTR(PM_INST_FROM_L1),
|
||||
CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
|
||||
CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
|
||||
CACHE_EVENT_PTR(PM_DATA_FROM_L3),
|
||||
CACHE_EVENT_PTR(PM_L3_PREF_ALL),
|
||||
CACHE_EVENT_PTR(PM_L2_ST_MISS),
|
||||
CACHE_EVENT_PTR(PM_L2_ST),
|
||||
CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
|
||||
CACHE_EVENT_PTR(PM_BRU_CMPL),
|
||||
CACHE_EVENT_PTR(PM_DTLB_MISS),
|
||||
CACHE_EVENT_PTR(PM_ITLB_MISS),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group power9_pmu_events_group = {
|
||||
.name = "events",
|
||||
.attrs = power9_events_attr,
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-49");
|
||||
PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
|
||||
PMU_FORMAT_ATTR(mark, "config:8");
|
||||
PMU_FORMAT_ATTR(combine, "config:11");
|
||||
PMU_FORMAT_ATTR(unit, "config:12-15");
|
||||
PMU_FORMAT_ATTR(pmc, "config:16-19");
|
||||
PMU_FORMAT_ATTR(cache_sel, "config:20-23");
|
||||
PMU_FORMAT_ATTR(sample_mode, "config:24-28");
|
||||
PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
|
||||
PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
|
||||
PMU_FORMAT_ATTR(thresh_start, "config:36-39");
|
||||
PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
|
||||
|
||||
static struct attribute *power9_pmu_format_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_pmcxsel.attr,
|
||||
&format_attr_mark.attr,
|
||||
&format_attr_combine.attr,
|
||||
&format_attr_unit.attr,
|
||||
&format_attr_pmc.attr,
|
||||
&format_attr_cache_sel.attr,
|
||||
&format_attr_sample_mode.attr,
|
||||
&format_attr_thresh_sel.attr,
|
||||
&format_attr_thresh_stop.attr,
|
||||
&format_attr_thresh_start.attr,
|
||||
&format_attr_thresh_cmp.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
struct attribute_group power9_pmu_format_group = {
|
||||
.name = "format",
|
||||
.attrs = power9_pmu_format_attr,
|
||||
};
|
||||
|
||||
static const struct attribute_group *power9_pmu_attr_groups[] = {
|
||||
&power9_pmu_format_group,
|
||||
&power9_pmu_events_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int power9_generic_events[] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN,
|
||||
};
|
||||
|
||||
static u64 power9_bhrb_filter_map(u64 branch_sample_type)
|
||||
{
|
||||
u64 pmu_bhrb_filter = 0;
|
||||
|
||||
/* BHRB and regular PMU events share the same privilege state
|
||||
* filter configuration. BHRB is always recorded along with a
|
||||
* regular PMU event. As the privilege state filter is handled
|
||||
* in the basic PMC configuration of the accompanying regular
|
||||
* PMU event, we ignore any separate BHRB specific request.
|
||||
*/
|
||||
|
||||
/* No branch filter requested */
|
||||
if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
|
||||
return pmu_bhrb_filter;
|
||||
|
||||
/* Invalid branch filter options - HW does not support */
|
||||
if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
|
||||
return -1;
|
||||
|
||||
if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
|
||||
return -1;
|
||||
|
||||
if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
|
||||
return -1;
|
||||
|
||||
if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
|
||||
pmu_bhrb_filter |= POWER9_MMCRA_IFM1;
|
||||
return pmu_bhrb_filter;
|
||||
}
|
||||
|
||||
/* Every thing else is unsupported */
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void power9_config_bhrb(u64 pmu_bhrb_filter)
|
||||
{
|
||||
/* Enable BHRB filter in PMU */
|
||||
mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
|
||||
}
|
||||
|
||||
#define C(x) PERF_COUNT_HW_CACHE_##x
|
||||
|
||||
/*
|
||||
* Table of generalized cache-related events.
|
||||
* 0 means not supported, -1 means nonsensical, other values
|
||||
* are event codes.
|
||||
*/
|
||||
static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
|
||||
[ C(L1D) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
|
||||
[ C(RESULT_MISS) ] = PM_LD_MISS_L1_FIN,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = PM_ST_MISS_L1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = PM_L1_PREF,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(L1I) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = PM_INST_FROM_L1,
|
||||
[ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(LL) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3,
|
||||
[ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = PM_L2_ST,
|
||||
[ C(RESULT_MISS) ] = PM_L2_ST_MISS,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
|
||||
[ C(RESULT_MISS) ] = 0,
|
||||
},
|
||||
},
|
||||
[ C(DTLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = PM_DTLB_MISS,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(ITLB) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = 0,
|
||||
[ C(RESULT_MISS) ] = PM_ITLB_MISS,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(BPU) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = PM_BRU_CMPL,
|
||||
[ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
[ C(NODE) ] = {
|
||||
[ C(OP_READ) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_WRITE) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
[ C(OP_PREFETCH) ] = {
|
||||
[ C(RESULT_ACCESS) ] = -1,
|
||||
[ C(RESULT_MISS) ] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#undef C
|
||||
|
||||
static struct power_pmu power9_pmu = {
|
||||
.name = "POWER9",
|
||||
.n_counter = MAX_PMU_COUNTERS,
|
||||
.add_fields = ISA207_ADD_FIELDS,
|
||||
.test_adder = ISA207_TEST_ADDER,
|
||||
.compute_mmcr = isa207_compute_mmcr,
|
||||
.config_bhrb = power9_config_bhrb,
|
||||
.bhrb_filter_map = power9_bhrb_filter_map,
|
||||
.get_constraint = isa207_get_constraint,
|
||||
.disable_pmc = isa207_disable_pmc,
|
||||
.flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
|
||||
.n_generic = ARRAY_SIZE(power9_generic_events),
|
||||
.generic_events = power9_generic_events,
|
||||
.cache_events = &power9_cache_events,
|
||||
.attr_groups = power9_pmu_attr_groups,
|
||||
.bhrb_nr = 32,
|
||||
};
|
||||
|
||||
static int __init init_power9_pmu(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* Comes from cpu_specs[] */
|
||||
if (!cur_cpu_spec->oprofile_cpu_type ||
|
||||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9"))
|
||||
return -ENODEV;
|
||||
|
||||
rc = register_power_pmu(&power9_pmu);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* Tell userspace that EBB is supported */
|
||||
cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_initcall(init_power9_pmu);
|
Reference in New Issue
Block a user