Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "The main kernel side changes were:

   - Modernize the kprobe and uprobe creation/destruction tooling ABIs:

     The existing text based APIs (kprobe_events and uprobe_events in
     tracefs), are naive, limited ABIs in that they require user-space
     to clean up after themselves, which is both difficult and fragile
     if the tool is buggy or exits unexpectedly. In other words they are
     not really suited for modern, robust tooling.

     So introduce a modern, file descriptor based ABI that does not have
     these limitations: introduce the 'perf_kprobe' and 'perf_uprobe'
     PMUs and extend the perf_event_open() syscall to create events with
     a kprobe/uprobe attached to them. These [k,u]probe are associated
     with this file descriptor, so they are not available in tracefs.

     (Song Liu)

   - Intel Cannon Lake CPU support (Harry Pan)

   - Intel PT cleanups (Alexander Shishkin)

   - Improve the performance of pinned/flexible event groups by using RB
     trees (Alexey Budankov)

   - Add PERF_EVENT_IOC_MODIFY_ATTRIBUTES which allows the modification
     of hardware breakpoints, which new ABI variant massively speeds up
     existing tooling that uses hardware breakpoints to instrument (and
     debug) memory usage.

     (Milind Chabbi, Jiri Olsa)

   - Various Intel PEBS handling fixes and improvements, and other Intel
     PMU improvements (Kan Liang)

   - Various perf core improvements and optimizations (Peter Zijlstra)

   - ... misc cleanups, fixes and updates.

  There's over 200 tooling commits, here's an (imperfect) list of
  highlights:

   - 'perf annotate' improvements:

      * Recognize and handle jumps to other functions as calls, which
        improves the navigation along jumps and back. (Arnaldo Carvalho
        de Melo)

      * Add the 'P' hotkey in TUI annotation to dump annotation output
        into a file, to ease e-mail reporting of annotation details.
        (Arnaldo Carvalho de Melo)

      * Add an IPC/cycles column to the TUI (Jin Yao)

      * Improve s390 assembly annotation (Thomas Richter)

      * Refactor the output formatting logic to better separate it into
        interactive and non-interactive features and add the --stdio2
        output variant to demonstrate this. (Arnaldo Carvalho de Melo)

   - 'perf script' improvements:

      * Add Python 3 support (Jaroslav Škarvada)

      * Add --show-round-event (Jiri Olsa)

   - 'perf c2c' improvements:

      * Add NUMA analysis support (Jiri Olsa)

   - 'perf trace' improvements:

      * Improve PowerPC support (Ravi Bangoria)

   - 'perf inject' improvements:

      * Integrate ARM CoreSight traces (Robert Walker)

   - 'perf stat' improvements:

      * Add the --interval-count option (yuzhoujian)

      * Add the --timeout option (yuzhoujian)

   - 'perf sched' improvements (Changbin Du)

   - Vendor events improvements :

      * Add IBM s390 vendor events (Thomas Richter)

      * Add and improve arm64 vendor events (John Garry, Ganapatrao
        Kulkarni)

      * Update POWER9 vendor events (Sukadev Bhattiprolu)

   - Intel PT tooling improvements (Adrian Hunter)

   - PMU handling improvements (Agustin Vega-Frias)

   - Record machine topology in perf.data (Jiri Olsa)

   - Various overwrite related cleanups (Kan Liang)

   - Add arm64 dwarf post unwind support (Kim Phillips, Jean Pihet)

   - ... and lots of other changes, cleanups and fixes, see the shortlog
     and Git history for details"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (262 commits)
  perf/x86/intel: Enable C-state residency events for Cannon Lake
  perf/x86/intel: Add Cannon Lake support for RAPL profiling
  perf/x86/pt, coresight: Clean up address filter structure
  perf vendor events s390: Add JSON files for IBM z14
  perf vendor events s390: Add JSON files for IBM z13
  perf vendor events s390: Add JSON files for IBM zEC12 zBC12
  perf vendor events s390: Add JSON files for IBM z196
  perf vendor events s390: Add JSON files for IBM z10EC z10BC
  perf mmap: Be consistent when checking for an unmaped ring buffer
  perf mmap: Fix accessing unmapped mmap in perf_mmap__read_done()
  perf build: Fix check-headers.sh opts assignment
  perf/x86: Update rdpmc_always_available static key to the modern API
  perf annotate: Use absolute addresses to calculate jump target offsets
  perf annotate: Defer searching for comma in raw line till it is needed
  perf annotate: Support jumping from one function to another
  perf annotate: Add "_local" to jump/offset validation routines
  perf python: Reference Py_None before returning it
  perf annotate: Mark jumps to outher functions with the call arrow
  perf annotate: Pass function descriptor to its instruction parsing routines
  perf annotate: No need to calculate notes->start twice
  ...
This commit is contained in:
Linus Torvalds
2018-04-02 11:06:34 -07:00
213 changed files with 9440 additions and 2237 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -44,6 +44,7 @@
#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/bug.h>
#include <linux/hw_breakpoint.h>
/*
@@ -85,9 +86,9 @@ __weak int hw_breakpoint_weight(struct perf_event *bp)
return 1;
}
static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
static inline enum bp_type_idx find_slot_idx(u64 bp_type)
{
if (bp->attr.bp_type & HW_BREAKPOINT_RW)
if (bp_type & HW_BREAKPOINT_RW)
return TYPE_DATA;
return TYPE_INST;
@@ -122,7 +123,7 @@ static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
if (iter->hw.target == tsk &&
find_slot_idx(iter) == type &&
find_slot_idx(iter->attr.bp_type) == type &&
(iter->cpu < 0 || cpu == iter->cpu))
count += hw_breakpoint_weight(iter);
}
@@ -277,7 +278,7 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
* ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
* + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
*/
static int __reserve_bp_slot(struct perf_event *bp)
static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
{
struct bp_busy_slots slots = {0};
enum bp_type_idx type;
@@ -288,11 +289,11 @@ static int __reserve_bp_slot(struct perf_event *bp)
return -ENOMEM;
/* Basic checks */
if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
bp->attr.bp_type == HW_BREAKPOINT_INVALID)
if (bp_type == HW_BREAKPOINT_EMPTY ||
bp_type == HW_BREAKPOINT_INVALID)
return -EINVAL;
type = find_slot_idx(bp);
type = find_slot_idx(bp_type);
weight = hw_breakpoint_weight(bp);
fetch_bp_busy_slots(&slots, bp, type);
@@ -317,19 +318,19 @@ int reserve_bp_slot(struct perf_event *bp)
mutex_lock(&nr_bp_mutex);
ret = __reserve_bp_slot(bp);
ret = __reserve_bp_slot(bp, bp->attr.bp_type);
mutex_unlock(&nr_bp_mutex);
return ret;
}
static void __release_bp_slot(struct perf_event *bp)
static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
{
enum bp_type_idx type;
int weight;
type = find_slot_idx(bp);
type = find_slot_idx(bp_type);
weight = hw_breakpoint_weight(bp);
toggle_bp_slot(bp, false, type, weight);
}
@@ -339,11 +340,43 @@ void release_bp_slot(struct perf_event *bp)
mutex_lock(&nr_bp_mutex);
arch_unregister_hw_breakpoint(bp);
__release_bp_slot(bp);
__release_bp_slot(bp, bp->attr.bp_type);
mutex_unlock(&nr_bp_mutex);
}
static int __modify_bp_slot(struct perf_event *bp, u64 old_type)
{
int err;
__release_bp_slot(bp, old_type);
err = __reserve_bp_slot(bp, bp->attr.bp_type);
if (err) {
/*
* Reserve the old_type slot back in case
* there's no space for the new type.
*
* This must succeed, because we just released
* the old_type slot in the __release_bp_slot
* call above. If not, something is broken.
*/
WARN_ON(__reserve_bp_slot(bp, old_type));
}
return err;
}
static int modify_bp_slot(struct perf_event *bp, u64 old_type)
{
int ret;
mutex_lock(&nr_bp_mutex);
ret = __modify_bp_slot(bp, old_type);
mutex_unlock(&nr_bp_mutex);
return ret;
}
/*
* Allow the kernel debugger to reserve breakpoint slots without
* taking a lock using the dbg_* variant of for the reserve and
@@ -354,7 +387,7 @@ int dbg_reserve_bp_slot(struct perf_event *bp)
if (mutex_is_locked(&nr_bp_mutex))
return -1;
return __reserve_bp_slot(bp);
return __reserve_bp_slot(bp, bp->attr.bp_type);
}
int dbg_release_bp_slot(struct perf_event *bp)
@@ -362,7 +395,7 @@ int dbg_release_bp_slot(struct perf_event *bp)
if (mutex_is_locked(&nr_bp_mutex))
return -1;
__release_bp_slot(bp);
__release_bp_slot(bp, bp->attr.bp_type);
return 0;
}
@@ -423,6 +456,38 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
}
EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
int
modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
bool check)
{
u64 old_addr = bp->attr.bp_addr;
u64 old_len = bp->attr.bp_len;
int old_type = bp->attr.bp_type;
bool modify = attr->bp_type != old_type;
int err = 0;
bp->attr.bp_addr = attr->bp_addr;
bp->attr.bp_type = attr->bp_type;
bp->attr.bp_len = attr->bp_len;
if (check && memcmp(&bp->attr, attr, sizeof(*attr)))
return -EINVAL;
err = validate_hw_breakpoint(bp);
if (!err && modify)
err = modify_bp_slot(bp, old_type);
if (err) {
bp->attr.bp_addr = old_addr;
bp->attr.bp_type = old_type;
bp->attr.bp_len = old_len;
return err;
}
bp->attr.disabled = attr->disabled;
return 0;
}
/**
* modify_user_hw_breakpoint - modify a user-space hardware breakpoint
* @bp: the breakpoint structure to modify
@@ -441,21 +506,14 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
else
perf_event_disable(bp);
bp->attr.bp_addr = attr->bp_addr;
bp->attr.bp_type = attr->bp_type;
bp->attr.bp_len = attr->bp_len;
bp->attr.disabled = 1;
if (!attr->disabled) {
int err = validate_hw_breakpoint(bp);
int err = modify_user_hw_breakpoint_check(bp, attr, false);
if (err)
return err;
perf_event_enable(bp);
bp->attr.disabled = 0;
}
return 0;
}
EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);

View File

@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include "trace.h"
#include "trace_probe.h"
static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
@@ -237,6 +238,107 @@ void perf_trace_destroy(struct perf_event *p_event)
mutex_unlock(&event_mutex);
}
#ifdef CONFIG_KPROBE_EVENTS
int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
{
int ret;
char *func = NULL;
struct trace_event_call *tp_event;
if (p_event->attr.kprobe_func) {
func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL);
if (!func)
return -ENOMEM;
ret = strncpy_from_user(
func, u64_to_user_ptr(p_event->attr.kprobe_func),
KSYM_NAME_LEN);
if (ret < 0)
goto out;
if (func[0] == '\0') {
kfree(func);
func = NULL;
}
}
tp_event = create_local_trace_kprobe(
func, (void *)(unsigned long)(p_event->attr.kprobe_addr),
p_event->attr.probe_offset, is_retprobe);
if (IS_ERR(tp_event)) {
ret = PTR_ERR(tp_event);
goto out;
}
ret = perf_trace_event_init(tp_event, p_event);
if (ret)
destroy_local_trace_kprobe(tp_event);
out:
kfree(func);
return ret;
}
void perf_kprobe_destroy(struct perf_event *p_event)
{
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
destroy_local_trace_kprobe(p_event->tp_event);
}
#endif /* CONFIG_KPROBE_EVENTS */
#ifdef CONFIG_UPROBE_EVENTS
int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
{
int ret;
char *path = NULL;
struct trace_event_call *tp_event;
if (!p_event->attr.uprobe_path)
return -EINVAL;
path = kzalloc(PATH_MAX, GFP_KERNEL);
if (!path)
return -ENOMEM;
ret = strncpy_from_user(
path, u64_to_user_ptr(p_event->attr.uprobe_path), PATH_MAX);
if (ret < 0)
goto out;
if (path[0] == '\0') {
ret = -EINVAL;
goto out;
}
tp_event = create_local_trace_uprobe(
path, p_event->attr.probe_offset, is_retprobe);
if (IS_ERR(tp_event)) {
ret = PTR_ERR(tp_event);
goto out;
}
/*
* local trace_uprobe need to hold event_mutex to call
* uprobe_buffer_enable() and uprobe_buffer_disable().
* event_mutex is not required for local trace_kprobes.
*/
mutex_lock(&event_mutex);
ret = perf_trace_event_init(tp_event, p_event);
if (ret)
destroy_local_trace_uprobe(tp_event);
mutex_unlock(&event_mutex);
out:
kfree(path);
return ret;
}
void perf_uprobe_destroy(struct perf_event *p_event)
{
mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
mutex_unlock(&event_mutex);
destroy_local_trace_uprobe(p_event->tp_event);
}
#endif /* CONFIG_UPROBE_EVENTS */
int perf_trace_add(struct perf_event *p_event, int flags)
{
struct trace_event_call *tp_event = p_event->tp_event;

View File

@@ -462,6 +462,14 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
disable_kprobe(&tk->rp.kp);
wait = 1;
}
/*
* if tk is not added to any list, it must be a local trace_kprobe
* created with perf_event_open. We don't need to wait for these
* trace_kprobes
*/
if (list_empty(&tk->list))
wait = 0;
out:
if (wait) {
/*
@@ -1358,12 +1366,9 @@ static struct trace_event_functions kprobe_funcs = {
.trace = print_kprobe_event
};
static int register_kprobe_event(struct trace_kprobe *tk)
static inline void init_trace_event_call(struct trace_kprobe *tk,
struct trace_event_call *call)
{
struct trace_event_call *call = &tk->tp.call;
int ret;
/* Initialize trace_event_call */
INIT_LIST_HEAD(&call->class->fields);
if (trace_kprobe_is_return(tk)) {
call->event.funcs = &kretprobe_funcs;
@@ -1372,6 +1377,19 @@ static int register_kprobe_event(struct trace_kprobe *tk)
call->event.funcs = &kprobe_funcs;
call->class->define_fields = kprobe_event_define_fields;
}
call->flags = TRACE_EVENT_FL_KPROBE;
call->class->reg = kprobe_register;
call->data = tk;
}
static int register_kprobe_event(struct trace_kprobe *tk)
{
struct trace_event_call *call = &tk->tp.call;
int ret = 0;
init_trace_event_call(tk, call);
if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
return -ENOMEM;
ret = register_trace_event(&call->event);
@@ -1379,9 +1397,6 @@ static int register_kprobe_event(struct trace_kprobe *tk)
kfree(call->print_fmt);
return -ENODEV;
}
call->flags = TRACE_EVENT_FL_KPROBE;
call->class->reg = kprobe_register;
call->data = tk;
ret = trace_add_event_call(call);
if (ret) {
pr_info("Failed to register kprobe event: %s\n",
@@ -1403,6 +1418,66 @@ static int unregister_kprobe_event(struct trace_kprobe *tk)
return ret;
}
#ifdef CONFIG_PERF_EVENTS
/* create a trace_kprobe, but don't add it to global lists */
struct trace_event_call *
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
bool is_return)
{
struct trace_kprobe *tk;
int ret;
char *event;
/*
* local trace_kprobes are not added to probe_list, so they are never
* searched in find_trace_kprobe(). Therefore, there is no concern of
* duplicated name here.
*/
event = func ? func : "DUMMY_EVENT";
tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
offs, 0 /* maxactive */, 0 /* nargs */,
is_return);
if (IS_ERR(tk)) {
pr_info("Failed to allocate trace_probe.(%d)\n",
(int)PTR_ERR(tk));
return ERR_CAST(tk);
}
init_trace_event_call(tk, &tk->tp.call);
if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
ret = -ENOMEM;
goto error;
}
ret = __register_trace_kprobe(tk);
if (ret < 0)
goto error;
return &tk->tp.call;
error:
free_trace_kprobe(tk);
return ERR_PTR(ret);
}
void destroy_local_trace_kprobe(struct trace_event_call *event_call)
{
struct trace_kprobe *tk;
tk = container_of(event_call, struct trace_kprobe, tp.call);
if (trace_probe_is_enabled(&tk->tp)) {
WARN_ON(1);
return;
}
__unregister_trace_kprobe(tk);
free_trace_kprobe(tk);
}
#endif /* CONFIG_PERF_EVENTS */
/* Make a tracefs interface for controlling probe points */
static __init int init_kprobe_trace(void)
{

View File

@@ -416,3 +416,14 @@ store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
}
extern int set_print_fmt(struct trace_probe *tp, bool is_return);
#ifdef CONFIG_PERF_EVENTS
extern struct trace_event_call *
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
bool is_return);
extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
extern struct trace_event_call *
create_local_trace_uprobe(char *name, unsigned long offs, bool is_return);
extern void destroy_local_trace_uprobe(struct trace_event_call *event_call);
#endif

View File

@@ -1292,16 +1292,25 @@ static struct trace_event_functions uprobe_funcs = {
.trace = print_uprobe_event
};
static int register_uprobe_event(struct trace_uprobe *tu)
static inline void init_trace_event_call(struct trace_uprobe *tu,
struct trace_event_call *call)
{
struct trace_event_call *call = &tu->tp.call;
int ret;
/* Initialize trace_event_call */
INIT_LIST_HEAD(&call->class->fields);
call->event.funcs = &uprobe_funcs;
call->class->define_fields = uprobe_event_define_fields;
call->flags = TRACE_EVENT_FL_UPROBE;
call->class->reg = trace_uprobe_register;
call->data = tu;
}
static int register_uprobe_event(struct trace_uprobe *tu)
{
struct trace_event_call *call = &tu->tp.call;
int ret = 0;
init_trace_event_call(tu, call);
if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
return -ENOMEM;
@@ -1311,9 +1320,6 @@ static int register_uprobe_event(struct trace_uprobe *tu)
return -ENODEV;
}
call->flags = TRACE_EVENT_FL_UPROBE;
call->class->reg = trace_uprobe_register;
call->data = tu;
ret = trace_add_event_call(call);
if (ret) {
@@ -1339,6 +1345,70 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)
return 0;
}
#ifdef CONFIG_PERF_EVENTS
struct trace_event_call *
create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
{
struct trace_uprobe *tu;
struct inode *inode;
struct path path;
int ret;
ret = kern_path(name, LOOKUP_FOLLOW, &path);
if (ret)
return ERR_PTR(ret);
inode = igrab(d_inode(path.dentry));
path_put(&path);
if (!inode || !S_ISREG(inode->i_mode)) {
iput(inode);
return ERR_PTR(-EINVAL);
}
/*
* local trace_kprobes are not added to probe_list, so they are never
* searched in find_trace_kprobe(). Therefore, there is no concern of
* duplicated name "DUMMY_EVENT" here.
*/
tu = alloc_trace_uprobe(UPROBE_EVENT_SYSTEM, "DUMMY_EVENT", 0,
is_return);
if (IS_ERR(tu)) {
pr_info("Failed to allocate trace_uprobe.(%d)\n",
(int)PTR_ERR(tu));
return ERR_CAST(tu);
}
tu->offset = offs;
tu->inode = inode;
tu->filename = kstrdup(name, GFP_KERNEL);
init_trace_event_call(tu, &tu->tp.call);
if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
ret = -ENOMEM;
goto error;
}
return &tu->tp.call;
error:
free_trace_uprobe(tu);
return ERR_PTR(ret);
}
void destroy_local_trace_uprobe(struct trace_event_call *event_call)
{
struct trace_uprobe *tu;
tu = container_of(event_call, struct trace_uprobe, tp.call);
kfree(tu->tp.call.print_fmt);
tu->tp.call.print_fmt = NULL;
free_trace_uprobe(tu);
}
#endif /* CONFIG_PERF_EVENTS */
/* Make a trace interface for controling probe points */
static __init int init_uprobe_trace(void)
{