Merge tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf event updates from Ingo Molnar: "HW support updates: - Add uncore support for Intel Comet Lake - Add RAPL support for Hygon Fam18h - Add Intel "IIO stack to PMON mapping" support on Skylake-SP CPUs, which enumerates per device performance counters via sysfs and enables the perf stat --iiostat functionality - Add support for Intel "Architectural LBRs", which generalized the model specific LBR hardware tracing feature into a model-independent, architected performance monitoring feature. Usage is mostly seamless to tooling, as the pre-existing LBR features are kept, but there's a couple of advantages under the hood, such as faster context-switching, faster LBR reads, cleaner exposure of LBR features to guest kernels, etc. ( Since architectural LBRs are supported via XSAVE, there's related changes to the x86 FPU code as well. ) ftrace/perf updates: - Add support to add a text poke event to record changes to kernel text (i.e. self-modifying code) in order to support tracers like Intel PT decoding through jump labels, kprobes and ftrace trampolines. Misc cleanups, smaller fixes..." * tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits) perf/x86/rapl: Add Hygon Fam18h RAPL support kprobes: Remove unnecessary module_mutex locking from kprobe_optimizer() x86/perf: Fix a typo perf: <linux/perf_event.h>: drop a duplicated word perf/x86/intel/lbr: Support XSAVES for arch LBR read perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch x86/fpu/xstate: Add helpers for LBR dynamic supervisor feature x86/fpu/xstate: Support dynamic supervisor feature for LBR x86/fpu: Use proper mask to replace full instruction mask perf/x86: Remove task_ctx_size perf/x86/intel/lbr: Create kmem_cache for the LBR context data perf/core: Use kmem_cache to allocate the PMU specific data perf/core: Factor out functions to allocate/free the task_ctx_data perf/x86/intel/lbr: Support Architectural LBR perf/x86/intel/lbr: Factor out intel_pmu_store_lbr perf/x86/intel/lbr: Factor out rdlbr_all() and wrlbr_all() perf/x86/intel/lbr: Mark the {rd,wr}lbr_{to,from} wrappers __always_inline perf/x86/intel/lbr: Unify the stored format of LBR information perf/x86/intel/lbr: Support LBR_CTL perf/x86: Expose CPUID enumeration bits for arch LBR ...
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/stringify.h>
|
||||
@@ -1001,6 +1002,7 @@ struct text_poke_loc {
|
||||
s32 rel32;
|
||||
u8 opcode;
|
||||
const u8 text[POKE_MAX_OPCODE_SIZE];
|
||||
u8 old;
|
||||
};
|
||||
|
||||
struct bp_patching_desc {
|
||||
@@ -1168,8 +1170,10 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
||||
/*
|
||||
* First step: add a int3 trap to the address that will be patched.
|
||||
*/
|
||||
for (i = 0; i < nr_entries; i++)
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
|
||||
text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
|
||||
}
|
||||
|
||||
text_poke_sync();
|
||||
|
||||
@@ -1177,14 +1181,45 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
||||
* Second step: update all but the first byte of the patched range.
|
||||
*/
|
||||
for (do_sync = 0, i = 0; i < nr_entries; i++) {
|
||||
u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
|
||||
int len = text_opcode_size(tp[i].opcode);
|
||||
|
||||
if (len - INT3_INSN_SIZE > 0) {
|
||||
memcpy(old + INT3_INSN_SIZE,
|
||||
text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
|
||||
len - INT3_INSN_SIZE);
|
||||
text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
|
||||
(const char *)tp[i].text + INT3_INSN_SIZE,
|
||||
len - INT3_INSN_SIZE);
|
||||
do_sync++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit a perf event to record the text poke, primarily to
|
||||
* support Intel PT decoding which must walk the executable code
|
||||
* to reconstruct the trace. The flow up to here is:
|
||||
* - write INT3 byte
|
||||
* - IPI-SYNC
|
||||
* - write instruction tail
|
||||
* At this point the actual control flow will be through the
|
||||
* INT3 and handler and not hit the old or new instruction.
|
||||
* Intel PT outputs FUP/TIP packets for the INT3, so the flow
|
||||
* can still be decoded. Subsequently:
|
||||
* - emit RECORD_TEXT_POKE with the new instruction
|
||||
* - IPI-SYNC
|
||||
* - write first byte
|
||||
* - IPI-SYNC
|
||||
* So before the text poke event timestamp, the decoder will see
|
||||
* either the old instruction flow or FUP/TIP of INT3. After the
|
||||
* text poke event timestamp, the decoder will see either the
|
||||
* new instruction flow or FUP/TIP of INT3. Thus decoders can
|
||||
* use the timestamp as the point at which to modify the
|
||||
* executable code.
|
||||
* The old instruction is recorded so that the event can be
|
||||
* processed forwards or backwards.
|
||||
*/
|
||||
perf_event_text_poke(text_poke_addr(&tp[i]), old, len,
|
||||
tp[i].text, len);
|
||||
}
|
||||
|
||||
if (do_sync) {
|
||||
|
@@ -82,6 +82,45 @@ bool irq_fpu_usable(void)
|
||||
}
|
||||
EXPORT_SYMBOL(irq_fpu_usable);
|
||||
|
||||
/*
|
||||
* These must be called with preempt disabled. Returns
|
||||
* 'true' if the FPU state is still intact and we can
|
||||
* keep registers active.
|
||||
*
|
||||
* The legacy FNSAVE instruction cleared all FPU state
|
||||
* unconditionally, so registers are essentially destroyed.
|
||||
* Modern FPU state can be kept in registers, if there are
|
||||
* no pending FP exceptions.
|
||||
*/
|
||||
int copy_fpregs_to_fpstate(struct fpu *fpu)
|
||||
{
|
||||
if (likely(use_xsave())) {
|
||||
copy_xregs_to_kernel(&fpu->state.xsave);
|
||||
|
||||
/*
|
||||
* AVX512 state is tracked here because its use is
|
||||
* known to slow the max clock speed of the core.
|
||||
*/
|
||||
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
|
||||
fpu->avx512_timestamp = jiffies;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (likely(use_fxsr())) {
|
||||
copy_fxregs_to_kernel(fpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Legacy FPU register saving, FNSAVE always clears FPU registers,
|
||||
* so we have to mark them inactive:
|
||||
*/
|
||||
asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(copy_fpregs_to_fpstate);
|
||||
|
||||
void kernel_fpu_begin(void)
|
||||
{
|
||||
preempt_disable();
|
||||
|
@@ -233,8 +233,10 @@ void fpu__init_cpu_xstate(void)
|
||||
/*
|
||||
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES)) {
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
|
||||
xfeatures_mask_dynamic());
|
||||
}
|
||||
}
|
||||
|
||||
static bool xfeature_enabled(enum xfeature xfeature)
|
||||
@@ -486,7 +488,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr)
|
||||
return ebx;
|
||||
}
|
||||
|
||||
static int xfeature_size(int xfeature_nr)
|
||||
int xfeature_size(int xfeature_nr)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
@@ -598,7 +600,8 @@ static void check_xstate_against_struct(int nr)
|
||||
*/
|
||||
if ((nr < XFEATURE_YMM) ||
|
||||
(nr >= XFEATURE_MAX) ||
|
||||
(nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) {
|
||||
(nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
|
||||
((nr >= XFEATURE_RSRVD_COMP_10) && (nr <= XFEATURE_LBR))) {
|
||||
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
|
||||
XSTATE_WARN_ON(1);
|
||||
}
|
||||
@@ -847,8 +850,10 @@ void fpu__resume_cpu(void)
|
||||
* Restore IA32_XSS. The same CPUID bit enumerates support
|
||||
* of XSAVES and MSR_IA32_XSS.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES))
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVES)) {
|
||||
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
|
||||
xfeatures_mask_dynamic());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1356,6 +1361,78 @@ void copy_supervisor_to_kernel(struct xregs_state *xstate)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* copy_dynamic_supervisor_to_kernel() - Save dynamic supervisor states to
|
||||
* an xsave area
|
||||
* @xstate: A pointer to an xsave area
|
||||
* @mask: Represent the dynamic supervisor features saved into the xsave area
|
||||
*
|
||||
* Only the dynamic supervisor states sets in the mask are saved into the xsave
|
||||
* area (See the comment in XFEATURE_MASK_DYNAMIC for the details of dynamic
|
||||
* supervisor feature). Besides the dynamic supervisor states, the legacy
|
||||
* region and XSAVE header are also saved into the xsave area. The supervisor
|
||||
* features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
|
||||
* XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not saved.
|
||||
*
|
||||
* The xsave area must be 64-bytes aligned.
|
||||
*/
|
||||
void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask)
|
||||
{
|
||||
u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
|
||||
u32 lmask, hmask;
|
||||
int err;
|
||||
|
||||
if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
|
||||
return;
|
||||
|
||||
if (WARN_ON_FPU(!dynamic_mask))
|
||||
return;
|
||||
|
||||
lmask = dynamic_mask;
|
||||
hmask = dynamic_mask >> 32;
|
||||
|
||||
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
|
||||
|
||||
/* Should never fault when copying to a kernel buffer */
|
||||
WARN_ON_FPU(err);
|
||||
}
|
||||
|
||||
/**
|
||||
* copy_kernel_to_dynamic_supervisor() - Restore dynamic supervisor states from
|
||||
* an xsave area
|
||||
* @xstate: A pointer to an xsave area
|
||||
* @mask: Represent the dynamic supervisor features restored from the xsave area
|
||||
*
|
||||
* Only the dynamic supervisor states sets in the mask are restored from the
|
||||
* xsave area (See the comment in XFEATURE_MASK_DYNAMIC for the details of
|
||||
* dynamic supervisor feature). Besides the dynamic supervisor states, the
|
||||
* legacy region and XSAVE header are also restored from the xsave area. The
|
||||
* supervisor features in the XFEATURE_MASK_SUPERVISOR_SUPPORTED and
|
||||
* XFEATURE_MASK_SUPERVISOR_UNSUPPORTED are not restored.
|
||||
*
|
||||
* The xsave area must be 64-bytes aligned.
|
||||
*/
|
||||
void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask)
|
||||
{
|
||||
u64 dynamic_mask = xfeatures_mask_dynamic() & mask;
|
||||
u32 lmask, hmask;
|
||||
int err;
|
||||
|
||||
if (WARN_ON_FPU(!boot_cpu_has(X86_FEATURE_XSAVES)))
|
||||
return;
|
||||
|
||||
if (WARN_ON_FPU(!dynamic_mask))
|
||||
return;
|
||||
|
||||
lmask = dynamic_mask;
|
||||
hmask = dynamic_mask >> 32;
|
||||
|
||||
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
|
||||
|
||||
/* Should never fault when copying from a kernel buffer */
|
||||
WARN_ON_FPU(err);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||
/*
|
||||
* Report the amount of time elapsed in millisecond since last AVX512
|
||||
|
@@ -33,6 +33,7 @@
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/extable.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kallsyms.h>
|
||||
@@ -472,6 +473,9 @@ static int arch_copy_kprobe(struct kprobe *p)
|
||||
/* Also, displacement change doesn't affect the first byte */
|
||||
p->opcode = buf[0];
|
||||
|
||||
p->ainsn.tp_len = len;
|
||||
perf_event_text_poke(p->ainsn.insn, NULL, 0, buf, len);
|
||||
|
||||
/* OK, write back the instruction(s) into ROX insn buffer */
|
||||
text_poke(p->ainsn.insn, buf, len);
|
||||
|
||||
@@ -503,12 +507,18 @@ int arch_prepare_kprobe(struct kprobe *p)
|
||||
|
||||
void arch_arm_kprobe(struct kprobe *p)
|
||||
{
|
||||
text_poke(p->addr, ((unsigned char []){INT3_INSN_OPCODE}), 1);
|
||||
u8 int3 = INT3_INSN_OPCODE;
|
||||
|
||||
text_poke(p->addr, &int3, 1);
|
||||
text_poke_sync();
|
||||
perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1);
|
||||
}
|
||||
|
||||
void arch_disarm_kprobe(struct kprobe *p)
|
||||
{
|
||||
u8 int3 = INT3_INSN_OPCODE;
|
||||
|
||||
perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1);
|
||||
text_poke(p->addr, &p->opcode, 1);
|
||||
text_poke_sync();
|
||||
}
|
||||
@@ -516,6 +526,9 @@ void arch_disarm_kprobe(struct kprobe *p)
|
||||
void arch_remove_kprobe(struct kprobe *p)
|
||||
{
|
||||
if (p->ainsn.insn) {
|
||||
/* Record the perf event before freeing the slot */
|
||||
perf_event_text_poke(p->ainsn.insn, p->ainsn.insn,
|
||||
p->ainsn.tp_len, NULL, 0);
|
||||
free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
|
||||
p->ainsn.insn = NULL;
|
||||
}
|
||||
|
@@ -6,6 +6,7 @@
|
||||
* Copyright (C) Hitachi Ltd., 2012
|
||||
*/
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/slab.h>
|
||||
@@ -352,8 +353,15 @@ int arch_within_optimized_kprobe(struct optimized_kprobe *op,
|
||||
static
|
||||
void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
|
||||
{
|
||||
if (op->optinsn.insn) {
|
||||
free_optinsn_slot(op->optinsn.insn, dirty);
|
||||
u8 *slot = op->optinsn.insn;
|
||||
if (slot) {
|
||||
int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
|
||||
|
||||
/* Record the perf event before freeing the slot */
|
||||
if (dirty)
|
||||
perf_event_text_poke(slot, slot, len, NULL, 0);
|
||||
|
||||
free_optinsn_slot(slot, dirty);
|
||||
op->optinsn.insn = NULL;
|
||||
op->optinsn.size = 0;
|
||||
}
|
||||
@@ -424,8 +432,15 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
|
||||
(u8 *)op->kp.addr + op->optinsn.size);
|
||||
len += JMP32_INSN_SIZE;
|
||||
|
||||
/*
|
||||
* Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
|
||||
* used in __arch_remove_optimized_kprobe().
|
||||
*/
|
||||
|
||||
/* We have to use text_poke() for instruction buffer because it is RO */
|
||||
perf_event_text_poke(slot, NULL, 0, buf, len);
|
||||
text_poke(slot, buf, len);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
kfree(buf);
|
||||
@@ -477,10 +492,23 @@ void arch_optimize_kprobes(struct list_head *oplist)
|
||||
*/
|
||||
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
|
||||
{
|
||||
arch_arm_kprobe(&op->kp);
|
||||
text_poke(op->kp.addr + INT3_INSN_SIZE,
|
||||
op->optinsn.copied_insn, DISP32_SIZE);
|
||||
u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
|
||||
u8 old[JMP32_INSN_SIZE];
|
||||
u8 *addr = op->kp.addr;
|
||||
|
||||
memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
|
||||
memcpy(new + INT3_INSN_SIZE,
|
||||
op->optinsn.copied_insn,
|
||||
JMP32_INSN_SIZE - INT3_INSN_SIZE);
|
||||
|
||||
text_poke(addr, new, INT3_INSN_SIZE);
|
||||
text_poke_sync();
|
||||
text_poke(addr + INT3_INSN_SIZE,
|
||||
new + INT3_INSN_SIZE,
|
||||
JMP32_INSN_SIZE - INT3_INSN_SIZE);
|
||||
text_poke_sync();
|
||||
|
||||
perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user