Merge tag 'perf-core-for-mingo-4.12-20170503' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

Fixes:

- Support setting probes in versioned user space symbols, such as
  pthread_create@@GLIBC_2.1, picking the default one, more work
  needed to make it possible to set it on the other versions, as
  the 'perf probe' syntax already uses @ for other purposes.
  (Paul Clarke)

- Do not special case address zero as an error for routines that
  return addresses (symbol lookup), instead use the return as the
  success/error indication and pass a pointer to return the address,
  fixing 'perf test vmlinux' (the one that compares address between
  vmlinux and kallsyms) on s/390, where the '_text' address is equal
  to zero (Arnaldo Carvalho de Melo)

Infrastructure changes:

- More header sanitization, moving stuff out of util.h into
  more appropriate headers and objects and sometimes creating
  new ones (Arnaldo Carvalho de Melo)

- Refactor a duplicated code for obtaining config file name (Taeung Song)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2017-05-03 19:28:27 +02:00
389 changed files with 8636 additions and 2472 deletions

View File

@@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>
#include "internal.h"
@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
{
perf_event_task(task, NULL, 1);
perf_event_namespaces(task);
}
/*
@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
perf_event_comm_event(&comm_event);
}
/*
* namespaces tracking
*/
struct perf_namespaces_event {
struct task_struct *task;
struct {
struct perf_event_header header;
u32 pid;
u32 tid;
u64 nr_namespaces;
struct perf_ns_link_info link_info[NR_NAMESPACES];
} event_id;
};
static int perf_event_namespaces_match(struct perf_event *event)
{
return event->attr.namespaces;
}
static void perf_event_namespaces_output(struct perf_event *event,
void *data)
{
struct perf_namespaces_event *namespaces_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;
if (!perf_event_namespaces_match(event))
return;
perf_event_header__init_id(&namespaces_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
namespaces_event->event_id.header.size);
if (ret)
return;
namespaces_event->event_id.pid = perf_event_pid(event,
namespaces_event->task);
namespaces_event->event_id.tid = perf_event_tid(event,
namespaces_event->task);
perf_output_put(&handle, namespaces_event->event_id);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct path ns_path;
struct inode *ns_inode;
void *error;
error = ns_get_path(&ns_path, task, ns_ops);
if (!error) {
ns_inode = ns_path.dentry->d_inode;
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
ns_link_info->ino = ns_inode->i_ino;
}
}
void perf_event_namespaces(struct task_struct *task)
{
struct perf_namespaces_event namespaces_event;
struct perf_ns_link_info *ns_link_info;
if (!atomic_read(&nr_namespaces_events))
return;
namespaces_event = (struct perf_namespaces_event){
.task = task,
.event_id = {
.header = {
.type = PERF_RECORD_NAMESPACES,
.misc = 0,
.size = sizeof(namespaces_event.event_id),
},
/* .pid */
/* .tid */
.nr_namespaces = NR_NAMESPACES,
/* .link_info[NR_NAMESPACES] */
},
};
ns_link_info = namespaces_event.event_id.link_info;
perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
task, &mntns_operations);
#ifdef CONFIG_USER_NS
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
task, &cgroupns_operations);
#endif
perf_iterate_sb(perf_event_namespaces_output,
&namespaces_event,
NULL);
}
/*
* mmap tracking
*/
@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EACCES;
}
if (attr.namespaces) {
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
}
if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;

View File

@@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
rb->paused = 1;
}
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
{
/*
* OVERWRITE is determined by perf_aux_output_end() and can't
* be passed in directly.
*/
if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE))
return;
handle->aux_flags |= flags;
}
EXPORT_SYMBOL_GPL(perf_aux_output_flag);
/*
* This is called before hardware starts writing to the AUX area to
* obtain an output handle and make sure there's room in the buffer.
@@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
handle->event = event;
handle->head = aux_head;
handle->size = 0;
handle->aux_flags = 0;
/*
* In overwrite mode, AUX data stores do not depend on aux_tail,
@@ -408,34 +422,32 @@ err:
* of the AUX buffer management code is that after pmu::stop(), the AUX
* transaction must be stopped and therefore drop the AUX reference count.
*/
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
bool truncated)
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
{
bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED);
struct ring_buffer *rb = handle->rb;
bool wakeup = truncated;
unsigned long aux_head;
u64 flags = 0;
if (truncated)
flags |= PERF_AUX_FLAG_TRUNCATED;
/* in overwrite mode, driver provides aux_head via handle */
if (rb->aux_overwrite) {
flags |= PERF_AUX_FLAG_OVERWRITE;
handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE;
aux_head = handle->head;
local_set(&rb->aux_head, aux_head);
} else {
handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE;
aux_head = local_read(&rb->aux_head);
local_add(size, &rb->aux_head);
}
if (size || flags) {
if (size || handle->aux_flags) {
/*
* Only send RECORD_AUX if we have something useful to communicate
*/
perf_event_aux_event(handle->event, aux_head, size, flags);
perf_event_aux_event(handle->event, aux_head, size,
handle->aux_flags);
}
aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
@@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
}
if (wakeup) {
if (truncated)
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
handle->event->pending_disable = 1;
perf_output_wakeup(handle);
}

View File

@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
}
}
perf_event_namespaces(current);
bad_unshare_cleanup_cred:
if (new_cred)
put_cred(new_cred);

View File

@@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr)
* This returns encoded errors if it fails to look up symbol or invalid
* combination of parameters.
*/
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
const char *symbol_name, unsigned int offset)
{
kprobe_opcode_t *addr = p->addr;
if ((p->symbol_name && p->addr) ||
(!p->symbol_name && !p->addr))
if ((symbol_name && addr) || (!symbol_name && !addr))
goto invalid;
if (p->symbol_name) {
kprobe_lookup_name(p->symbol_name, addr);
if (symbol_name) {
kprobe_lookup_name(symbol_name, addr);
if (!addr)
return ERR_PTR(-ENOENT);
}
addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
addr = (kprobe_opcode_t *)(((char *)addr) + offset);
if (addr)
return addr;
@@ -1413,6 +1411,11 @@ invalid:
return ERR_PTR(-EINVAL);
}
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
{
return _kprobe_addr(p->addr, p->symbol_name, p->offset);
}
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
static struct kprobe *__get_valid_kprobe(struct kprobe *p)
{
@@ -1740,11 +1743,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
}
EXPORT_SYMBOL_GPL(unregister_kprobes);
int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
int __weak kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
return NOTIFY_DONE;
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
@@ -1875,6 +1879,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);
bool __weak arch_function_offset_within_entry(unsigned long offset)
{
return !offset;
}
bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
{
kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
if (IS_ERR(kp_addr))
return false;
if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
!arch_function_offset_within_entry(offset))
return false;
return true;
}
int register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
@@ -1882,6 +1905,9 @@ int register_kretprobe(struct kretprobe *rp)
int i;
void *addr;
if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
return -EINVAL;
if (kretprobe_blacklist_size) {
addr = kprobe_addr(&rp->kp);
if (IS_ERR(addr))

View File

@@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>
static struct kmem_cache *nsproxy_cachep;
@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out;
}
switch_task_namespaces(tsk, new_nsproxy);
perf_event_namespaces(tsk);
out:
fput(file);
return err;

View File

@@ -455,7 +455,7 @@ config UPROBE_EVENTS
select UPROBES
select PROBE_EVENTS
select TRACING
default n
default y
help
This allows the user to add tracing events on top of userspace
dynamic events (similar to tracepoints) on the fly via the trace

View File

@@ -4355,6 +4355,7 @@ static const char readme_msg[] =
"\t -:[<group>/]<event>\n"
#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
"place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
#endif
#ifdef CONFIG_UPROBE_EVENTS
"\t place: <path>:<offset>\n"

View File

@@ -681,10 +681,6 @@ static int create_trace_kprobe(int argc, char **argv)
return -EINVAL;
}
if (isdigit(argv[1][0])) {
if (is_return) {
pr_info("Return probe point must be a symbol.\n");
return -EINVAL;
}
/* an address specified */
ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
if (ret) {
@@ -700,8 +696,9 @@ static int create_trace_kprobe(int argc, char **argv)
pr_info("Failed to parse symbol.\n");
return ret;
}
if (offset && is_return) {
pr_info("Return probe must be used without offset.\n");
if (offset && is_return &&
!function_offset_within_entry(NULL, symbol, offset)) {
pr_info("Given offset is not valid for return probe.\n");
return -EINVAL;
}
}