Merge tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf event updates from Ingo Molnar:
 "HW support updates:

   - Add uncore support for Intel Comet Lake

   - Add RAPL support for Hygon Fam18h

   - Add Intel "IIO stack to PMON mapping" support on Skylake-SP CPUs,
     which enumerates per device performance counters via sysfs and
     enables the perf stat --iiostat functionality

   - Add support for Intel "Architectural LBRs", which generalized the
     model specific LBR hardware tracing feature into a
     model-independent, architected performance monitoring feature.

     Usage is mostly seamless to tooling, as the pre-existing LBR
     features are kept, but there's a couple of advantages under the
     hood, such as faster context-switching, faster LBR reads, cleaner
     exposure of LBR features to guest kernels, etc.

     ( Since architectural LBRs are supported via XSAVE, there's related
       changes to the x86 FPU code as well. )

  ftrace/perf updates:

   - Add support to add a text poke event to record changes to kernel
     text (i.e. self-modifying code) in order to support tracers like
     Intel PT decoding through jump labels, kprobes and ftrace
     trampolines.

  Misc cleanups, smaller fixes..."

* tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits)
  perf/x86/rapl: Add Hygon Fam18h RAPL support
  kprobes: Remove unnecessary module_mutex locking from kprobe_optimizer()
  x86/perf: Fix a typo
  perf: <linux/perf_event.h>: drop a duplicated word
  perf/x86/intel/lbr: Support XSAVES for arch LBR read
  perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch
  x86/fpu/xstate: Add helpers for LBR dynamic supervisor feature
  x86/fpu/xstate: Support dynamic supervisor feature for LBR
  x86/fpu: Use proper mask to replace full instruction mask
  perf/x86: Remove task_ctx_size
  perf/x86/intel/lbr: Create kmem_cache for the LBR context data
  perf/core: Use kmem_cache to allocate the PMU specific data
  perf/core: Factor out functions to allocate/free the task_ctx_data
  perf/x86/intel/lbr: Support Architectural LBR
  perf/x86/intel/lbr: Factor out intel_pmu_store_lbr
  perf/x86/intel/lbr: Factor out rdlbr_all() and wrlbr_all()
  perf/x86/intel/lbr: Mark the {rd,wr}lbr_{to,from} wrappers __always_inline
  perf/x86/intel/lbr: Unify the stored format of LBR information
  perf/x86/intel/lbr: Support LBR_CTL
  perf/x86: Expose CPUID enumeration bits for arch LBR
  ...
This commit is contained in:
Linus Torvalds
2020-08-03 14:51:09 -07:00
32 changed files with 1943 additions and 280 deletions

View File

@@ -394,6 +394,7 @@ static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_ksymbol_events __read_mostly;
static atomic_t nr_bpf_events __read_mostly;
static atomic_t nr_cgroup_events __read_mostly;
static atomic_t nr_text_poke_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@@ -1237,12 +1238,26 @@ static void get_ctx(struct perf_event_context *ctx)
refcount_inc(&ctx->refcount);
}
static void *alloc_task_ctx_data(struct pmu *pmu)
{
if (pmu->task_ctx_cache)
return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL);
return NULL;
}
static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data)
{
if (pmu->task_ctx_cache && task_ctx_data)
kmem_cache_free(pmu->task_ctx_cache, task_ctx_data);
}
static void free_ctx(struct rcu_head *head)
{
struct perf_event_context *ctx;
ctx = container_of(head, struct perf_event_context, rcu_head);
kfree(ctx->task_ctx_data);
free_task_ctx_data(ctx->pmu, ctx->task_ctx_data);
kfree(ctx);
}
@@ -4470,7 +4485,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
goto errout;
if (event->attach_state & PERF_ATTACH_TASK_DATA) {
task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL);
task_ctx_data = alloc_task_ctx_data(pmu);
if (!task_ctx_data) {
err = -ENOMEM;
goto errout;
@@ -4528,11 +4543,11 @@ retry:
}
}
kfree(task_ctx_data);
free_task_ctx_data(pmu, task_ctx_data);
return ctx;
errout:
kfree(task_ctx_data);
free_task_ctx_data(pmu, task_ctx_data);
return ERR_PTR(err);
}
@@ -4575,7 +4590,7 @@ static bool is_sb_event(struct perf_event *event)
if (attr->mmap || attr->mmap_data || attr->mmap2 ||
attr->comm || attr->comm_exec ||
attr->task || attr->ksymbol ||
attr->context_switch ||
attr->context_switch || attr->text_poke ||
attr->bpf_event)
return true;
return false;
@@ -4651,6 +4666,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_ksymbol_events);
if (event->attr.bpf_event)
atomic_dec(&nr_bpf_events);
if (event->attr.text_poke)
atomic_dec(&nr_text_poke_events);
if (dec) {
if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -8628,6 +8645,89 @@ void perf_event_bpf_event(struct bpf_prog *prog,
perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
}
struct perf_text_poke_event {
const void *old_bytes;
const void *new_bytes;
size_t pad;
u16 old_len;
u16 new_len;
struct {
struct perf_event_header header;
u64 addr;
} event_id;
};
static int perf_event_text_poke_match(struct perf_event *event)
{
return event->attr.text_poke;
}
static void perf_event_text_poke_output(struct perf_event *event, void *data)
{
struct perf_text_poke_event *text_poke_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
u64 padding = 0;
int ret;
if (!perf_event_text_poke_match(event))
return;
perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size);
if (ret)
return;
perf_output_put(&handle, text_poke_event->event_id);
perf_output_put(&handle, text_poke_event->old_len);
perf_output_put(&handle, text_poke_event->new_len);
__output_copy(&handle, text_poke_event->old_bytes, text_poke_event->old_len);
__output_copy(&handle, text_poke_event->new_bytes, text_poke_event->new_len);
if (text_poke_event->pad)
__output_copy(&handle, &padding, text_poke_event->pad);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
void perf_event_text_poke(const void *addr, const void *old_bytes,
size_t old_len, const void *new_bytes, size_t new_len)
{
struct perf_text_poke_event text_poke_event;
size_t tot, pad;
if (!atomic_read(&nr_text_poke_events))
return;
tot = sizeof(text_poke_event.old_len) + old_len;
tot += sizeof(text_poke_event.new_len) + new_len;
pad = ALIGN(tot, sizeof(u64)) - tot;
text_poke_event = (struct perf_text_poke_event){
.old_bytes = old_bytes,
.new_bytes = new_bytes,
.pad = pad,
.old_len = old_len,
.new_len = new_len,
.event_id = {
.header = {
.type = PERF_RECORD_TEXT_POKE,
.misc = PERF_RECORD_MISC_KERNEL,
.size = sizeof(text_poke_event.event_id) + tot + pad,
},
.addr = (unsigned long)addr,
},
};
perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL);
}
void perf_event_itrace_started(struct perf_event *event)
{
event->attach_state |= PERF_ATTACH_ITRACE;
@@ -10945,6 +11045,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_ksymbol_events);
if (event->attr.bpf_event)
atomic_inc(&nr_bpf_events);
if (event->attr.text_poke)
atomic_inc(&nr_text_poke_events);
if (inc) {
/*
@@ -12409,8 +12511,7 @@ inherit_event(struct perf_event *parent_event,
!child_ctx->task_ctx_data) {
struct pmu *pmu = child_event->pmu;
child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size,
GFP_KERNEL);
child_ctx->task_ctx_data = alloc_task_ctx_data(pmu);
if (!child_ctx->task_ctx_data) {
free_event(child_event);
return ERR_PTR(-ENOMEM);

View File

@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
#include <linux/compiler.h>
/*
@@ -437,6 +438,7 @@ struct kallsym_iter {
loff_t pos_arch_end;
loff_t pos_mod_end;
loff_t pos_ftrace_mod_end;
loff_t pos_bpf_end;
unsigned long value;
unsigned int nameoff; /* If iterating in core kernel symbols. */
char type;
@@ -480,6 +482,11 @@ static int get_ksymbol_mod(struct kallsym_iter *iter)
return 1;
}
/*
* ftrace_mod_get_kallsym() may also get symbols for pages allocated for ftrace
* purposes. In that case "__builtin__ftrace" is used as a module name, even
* though "__builtin__ftrace" is not a module.
*/
static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
{
int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_mod_end,
@@ -496,11 +503,33 @@ static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
static int get_ksymbol_bpf(struct kallsym_iter *iter)
{
int ret;
strlcpy(iter->module_name, "bpf", MODULE_NAME_LEN);
iter->exported = 0;
return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
&iter->value, &iter->type,
iter->name) < 0 ? 0 : 1;
ret = bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end,
&iter->value, &iter->type,
iter->name);
if (ret < 0) {
iter->pos_bpf_end = iter->pos;
return 0;
}
return 1;
}
/*
* This uses "__builtin__kprobes" as a module name for symbols for pages
* allocated for kprobes' purposes, even though "__builtin__kprobes" is not a
* module.
*/
static int get_ksymbol_kprobe(struct kallsym_iter *iter)
{
strlcpy(iter->module_name, "__builtin__kprobes", MODULE_NAME_LEN);
iter->exported = 0;
return kprobe_get_kallsym(iter->pos - iter->pos_bpf_end,
&iter->value, &iter->type,
iter->name) < 0 ? 0 : 1;
}
/* Returns space to next name. */
@@ -527,6 +556,7 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
iter->pos_arch_end = 0;
iter->pos_mod_end = 0;
iter->pos_ftrace_mod_end = 0;
iter->pos_bpf_end = 0;
}
}
@@ -551,7 +581,11 @@ static int update_iter_mod(struct kallsym_iter *iter, loff_t pos)
get_ksymbol_ftrace_mod(iter))
return 1;
return get_ksymbol_bpf(iter);
if ((!iter->pos_bpf_end || iter->pos_bpf_end > pos) &&
get_ksymbol_bpf(iter))
return 1;
return get_ksymbol_kprobe(iter);
}
/* Returns false if pos at or past end of file. */

View File

@@ -35,6 +35,7 @@
#include <linux/ftrace.h>
#include <linux/cpu.h>
#include <linux/jump_label.h>
#include <linux/perf_event.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
@@ -123,6 +124,7 @@ struct kprobe_insn_cache kprobe_insn_slots = {
.mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
.alloc = alloc_insn_page,
.free = free_insn_page,
.sym = KPROBE_INSN_PAGE_SYM,
.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
.insn_size = MAX_INSN_SIZE,
.nr_garbage = 0,
@@ -188,6 +190,10 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
kip->cache = c;
list_add_rcu(&kip->list, &c->pages);
slot = kip->insns;
/* Record the perf ksymbol register event after adding the page */
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
PAGE_SIZE, false, c->sym);
out:
mutex_unlock(&c->mutex);
return slot;
@@ -206,6 +212,13 @@ static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
* next time somebody inserts a probe.
*/
if (!list_is_singular(&kip->list)) {
/*
* Record perf ksymbol unregister event before removing
* the page.
*/
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
(unsigned long)kip->insns, PAGE_SIZE, true,
kip->cache->sym);
list_del_rcu(&kip->list);
synchronize_rcu();
kip->cache->free(kip->insns);
@@ -295,12 +308,34 @@ bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
return ret;
}
int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
unsigned long *value, char *type, char *sym)
{
struct kprobe_insn_page *kip;
int ret = -ERANGE;
rcu_read_lock();
list_for_each_entry_rcu(kip, &c->pages, list) {
if ((*symnum)--)
continue;
strlcpy(sym, c->sym, KSYM_NAME_LEN);
*type = 't';
*value = (unsigned long)kip->insns;
ret = 0;
break;
}
rcu_read_unlock();
return ret;
}
#ifdef CONFIG_OPTPROBES
/* For optimized_kprobe buffer */
struct kprobe_insn_cache kprobe_optinsn_slots = {
.mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
.alloc = alloc_insn_page,
.free = free_insn_page,
.sym = KPROBE_OPTINSN_PAGE_SYM,
.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
/* .insn_size is initialized later */
.nr_garbage = 0,
@@ -563,8 +598,6 @@ static void kprobe_optimizer(struct work_struct *work)
mutex_lock(&kprobe_mutex);
cpus_read_lock();
mutex_lock(&text_mutex);
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
/*
* Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
@@ -589,7 +622,6 @@ static void kprobe_optimizer(struct work_struct *work)
/* Step 4: Free cleaned kprobes after quiesence period */
do_free_cleaned_kprobes();
mutex_unlock(&module_mutex);
mutex_unlock(&text_mutex);
cpus_read_unlock();
@@ -2232,6 +2264,28 @@ static void kprobe_remove_ksym_blacklist(unsigned long entry)
kprobe_remove_area_blacklist(entry, entry + 1);
}
int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
char *type, char *sym)
{
return -ERANGE;
}
int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym)
{
#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
return 0;
#ifdef CONFIG_OPTPROBES
if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
return 0;
#endif
#endif
if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
return 0;
return -ERANGE;
}
int __init __weak arch_populate_kprobe_blacklist(void)
{
return 0;

View File

@@ -2764,6 +2764,50 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops)
{
}
/* List of trace_ops that have allocated trampolines */
static LIST_HEAD(ftrace_ops_trampoline_list);
static void ftrace_add_trampoline_to_kallsyms(struct ftrace_ops *ops)
{
lockdep_assert_held(&ftrace_lock);
list_add_rcu(&ops->list, &ftrace_ops_trampoline_list);
}
static void ftrace_remove_trampoline_from_kallsyms(struct ftrace_ops *ops)
{
lockdep_assert_held(&ftrace_lock);
list_del_rcu(&ops->list);
}
/*
* "__builtin__ftrace" is used as a module name in /proc/kallsyms for symbols
* for pages allocated for ftrace purposes, even though "__builtin__ftrace" is
* not a module.
*/
#define FTRACE_TRAMPOLINE_MOD "__builtin__ftrace"
#define FTRACE_TRAMPOLINE_SYM "ftrace_trampoline"
static void ftrace_trampoline_free(struct ftrace_ops *ops)
{
if (ops && (ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP) &&
ops->trampoline) {
/*
* Record the text poke event before the ksymbol unregister
* event.
*/
perf_event_text_poke((void *)ops->trampoline,
(void *)ops->trampoline,
ops->trampoline_size, NULL, 0);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
ops->trampoline, ops->trampoline_size,
true, FTRACE_TRAMPOLINE_SYM);
/* Remove from kallsyms after the perf events */
ftrace_remove_trampoline_from_kallsyms(ops);
}
arch_ftrace_trampoline_free(ops);
}
static void ftrace_startup_enable(int command)
{
if (saved_ftrace_func != ftrace_trace_function) {
@@ -2934,7 +2978,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
synchronize_rcu_tasks();
free_ops:
arch_ftrace_trampoline_free(ops);
ftrace_trampoline_free(ops);
}
return 0;
@@ -6178,6 +6222,27 @@ struct ftrace_mod_map {
unsigned int num_funcs;
};
static int ftrace_get_trampoline_kallsym(unsigned int symnum,
unsigned long *value, char *type,
char *name, char *module_name,
int *exported)
{
struct ftrace_ops *op;
list_for_each_entry_rcu(op, &ftrace_ops_trampoline_list, list) {
if (!op->trampoline || symnum--)
continue;
*value = op->trampoline;
*type = 't';
strlcpy(name, FTRACE_TRAMPOLINE_SYM, KSYM_NAME_LEN);
strlcpy(module_name, FTRACE_TRAMPOLINE_MOD, MODULE_NAME_LEN);
*exported = 0;
return 0;
}
return -ERANGE;
}
#ifdef CONFIG_MODULES
#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
@@ -6514,6 +6579,7 @@ int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
{
struct ftrace_mod_map *mod_map;
struct ftrace_mod_func *mod_func;
int ret;
preempt_disable();
list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) {
@@ -6540,8 +6606,10 @@ int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
WARN_ON(1);
break;
}
ret = ftrace_get_trampoline_kallsym(symnum, value, type, name,
module_name, exported);
preempt_enable();
return -ERANGE;
return ret;
}
#else
@@ -6553,6 +6621,18 @@ allocate_ftrace_mod_map(struct module *mod,
{
return NULL;
}
int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
char *type, char *name, char *module_name,
int *exported)
{
int ret;
preempt_disable();
ret = ftrace_get_trampoline_kallsym(symnum, value, type, name,
module_name, exported);
preempt_enable();
return ret;
}
#endif /* CONFIG_MODULES */
struct ftrace_init_func {
@@ -6733,7 +6813,24 @@ void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops)
static void ftrace_update_trampoline(struct ftrace_ops *ops)
{
unsigned long trampoline = ops->trampoline;
arch_ftrace_update_trampoline(ops);
if (ops->trampoline && ops->trampoline != trampoline &&
(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) {
/* Add to kallsyms before the perf events */
ftrace_add_trampoline_to_kallsyms(ops);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
ops->trampoline, ops->trampoline_size, false,
FTRACE_TRAMPOLINE_SYM);
/*
* Record the perf text poke event after the ksymbol register
* event.
*/
perf_event_text_poke((void *)ops->trampoline, NULL, 0,
(void *)ops->trampoline,
ops->trampoline_size);
}
}
void ftrace_init_trace_array(struct trace_array *tr)