Merge tag 'perf-core-for-mingo-4.12-20170503' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: Fixes: - Support setting probes in versioned user space symbols, such as pthread_create@@GLIBC_2.1, picking the default one, more work needed to make it possible to set it on the other versions, as the 'perf probe' syntax already uses @ for other purposes. (Paul Clarke) - Do not special case address zero as an error for routines that return addresses (symbol lookup), instead use the return as the success/error indication and pass a pointer to return the address, fixing 'perf test vmlinux' (the one that compares address between vmlinux and kallsyms) on s/390, where the '_text' address is equal to zero (Arnaldo Carvalho de Melo) Infrastructure changes: - More header sanitization, moving stuff out of util.h into more appropriate headers and objects and sometimes creating new ones (Arnaldo Carvalho de Melo) - Refactor a duplicated code for obtaining config file name (Taeung Song) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -48,6 +48,8 @@
|
||||
#include <linux/parser.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/proc_ns.h>
|
||||
#include <linux/mount.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
|
||||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
static atomic_t nr_comm_events __read_mostly;
|
||||
static atomic_t nr_namespaces_events __read_mostly;
|
||||
static atomic_t nr_task_events __read_mostly;
|
||||
static atomic_t nr_freq_events __read_mostly;
|
||||
static atomic_t nr_switch_events __read_mostly;
|
||||
@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
|
||||
atomic_dec(&nr_mmap_events);
|
||||
if (event->attr.comm)
|
||||
atomic_dec(&nr_comm_events);
|
||||
if (event->attr.namespaces)
|
||||
atomic_dec(&nr_namespaces_events);
|
||||
if (event->attr.task)
|
||||
atomic_dec(&nr_task_events);
|
||||
if (event->attr.freq)
|
||||
@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
|
||||
void perf_event_fork(struct task_struct *task)
|
||||
{
|
||||
perf_event_task(task, NULL, 1);
|
||||
perf_event_namespaces(task);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
|
||||
perf_event_comm_event(&comm_event);
|
||||
}
|
||||
|
||||
/*
|
||||
* namespaces tracking
|
||||
*/
|
||||
|
||||
struct perf_namespaces_event {
|
||||
struct task_struct *task;
|
||||
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
|
||||
u32 pid;
|
||||
u32 tid;
|
||||
u64 nr_namespaces;
|
||||
struct perf_ns_link_info link_info[NR_NAMESPACES];
|
||||
} event_id;
|
||||
};
|
||||
|
||||
static int perf_event_namespaces_match(struct perf_event *event)
|
||||
{
|
||||
return event->attr.namespaces;
|
||||
}
|
||||
|
||||
static void perf_event_namespaces_output(struct perf_event *event,
|
||||
void *data)
|
||||
{
|
||||
struct perf_namespaces_event *namespaces_event = data;
|
||||
struct perf_output_handle handle;
|
||||
struct perf_sample_data sample;
|
||||
int ret;
|
||||
|
||||
if (!perf_event_namespaces_match(event))
|
||||
return;
|
||||
|
||||
perf_event_header__init_id(&namespaces_event->event_id.header,
|
||||
&sample, event);
|
||||
ret = perf_output_begin(&handle, event,
|
||||
namespaces_event->event_id.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
namespaces_event->event_id.pid = perf_event_pid(event,
|
||||
namespaces_event->task);
|
||||
namespaces_event->event_id.tid = perf_event_tid(event,
|
||||
namespaces_event->task);
|
||||
|
||||
perf_output_put(&handle, namespaces_event->event_id);
|
||||
|
||||
perf_event__output_id_sample(event, &handle, &sample);
|
||||
|
||||
perf_output_end(&handle);
|
||||
}
|
||||
|
||||
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
|
||||
struct task_struct *task,
|
||||
const struct proc_ns_operations *ns_ops)
|
||||
{
|
||||
struct path ns_path;
|
||||
struct inode *ns_inode;
|
||||
void *error;
|
||||
|
||||
error = ns_get_path(&ns_path, task, ns_ops);
|
||||
if (!error) {
|
||||
ns_inode = ns_path.dentry->d_inode;
|
||||
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
|
||||
ns_link_info->ino = ns_inode->i_ino;
|
||||
}
|
||||
}
|
||||
|
||||
void perf_event_namespaces(struct task_struct *task)
|
||||
{
|
||||
struct perf_namespaces_event namespaces_event;
|
||||
struct perf_ns_link_info *ns_link_info;
|
||||
|
||||
if (!atomic_read(&nr_namespaces_events))
|
||||
return;
|
||||
|
||||
namespaces_event = (struct perf_namespaces_event){
|
||||
.task = task,
|
||||
.event_id = {
|
||||
.header = {
|
||||
.type = PERF_RECORD_NAMESPACES,
|
||||
.misc = 0,
|
||||
.size = sizeof(namespaces_event.event_id),
|
||||
},
|
||||
/* .pid */
|
||||
/* .tid */
|
||||
.nr_namespaces = NR_NAMESPACES,
|
||||
/* .link_info[NR_NAMESPACES] */
|
||||
},
|
||||
};
|
||||
|
||||
ns_link_info = namespaces_event.event_id.link_info;
|
||||
|
||||
perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
|
||||
task, &mntns_operations);
|
||||
|
||||
#ifdef CONFIG_USER_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
|
||||
task, &userns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_NET_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
|
||||
task, &netns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_UTS_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
|
||||
task, &utsns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_IPC_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
|
||||
task, &ipcns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_PID_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
|
||||
task, &pidns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUPS
|
||||
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
|
||||
task, &cgroupns_operations);
|
||||
#endif
|
||||
|
||||
perf_iterate_sb(perf_event_namespaces_output,
|
||||
&namespaces_event,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmap tracking
|
||||
*/
|
||||
@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
|
||||
atomic_inc(&nr_mmap_events);
|
||||
if (event->attr.comm)
|
||||
atomic_inc(&nr_comm_events);
|
||||
if (event->attr.namespaces)
|
||||
atomic_inc(&nr_namespaces_events);
|
||||
if (event->attr.task)
|
||||
atomic_inc(&nr_task_events);
|
||||
if (event->attr.freq)
|
||||
@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (attr.namespaces) {
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (attr.freq) {
|
||||
if (attr.sample_freq > sysctl_perf_event_sample_rate)
|
||||
return -EINVAL;
|
||||
|
@@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
||||
rb->paused = 1;
|
||||
}
|
||||
|
||||
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
|
||||
{
|
||||
/*
|
||||
* OVERWRITE is determined by perf_aux_output_end() and can't
|
||||
* be passed in directly.
|
||||
*/
|
||||
if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE))
|
||||
return;
|
||||
|
||||
handle->aux_flags |= flags;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_aux_output_flag);
|
||||
|
||||
/*
|
||||
* This is called before hardware starts writing to the AUX area to
|
||||
* obtain an output handle and make sure there's room in the buffer.
|
||||
@@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
handle->event = event;
|
||||
handle->head = aux_head;
|
||||
handle->size = 0;
|
||||
handle->aux_flags = 0;
|
||||
|
||||
/*
|
||||
* In overwrite mode, AUX data stores do not depend on aux_tail,
|
||||
@@ -408,34 +422,32 @@ err:
|
||||
* of the AUX buffer management code is that after pmu::stop(), the AUX
|
||||
* transaction must be stopped and therefore drop the AUX reference count.
|
||||
*/
|
||||
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||
bool truncated)
|
||||
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
|
||||
{
|
||||
bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED);
|
||||
struct ring_buffer *rb = handle->rb;
|
||||
bool wakeup = truncated;
|
||||
unsigned long aux_head;
|
||||
u64 flags = 0;
|
||||
|
||||
if (truncated)
|
||||
flags |= PERF_AUX_FLAG_TRUNCATED;
|
||||
|
||||
/* in overwrite mode, driver provides aux_head via handle */
|
||||
if (rb->aux_overwrite) {
|
||||
flags |= PERF_AUX_FLAG_OVERWRITE;
|
||||
handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE;
|
||||
|
||||
aux_head = handle->head;
|
||||
local_set(&rb->aux_head, aux_head);
|
||||
} else {
|
||||
handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE;
|
||||
|
||||
aux_head = local_read(&rb->aux_head);
|
||||
local_add(size, &rb->aux_head);
|
||||
}
|
||||
|
||||
if (size || flags) {
|
||||
if (size || handle->aux_flags) {
|
||||
/*
|
||||
* Only send RECORD_AUX if we have something useful to communicate
|
||||
*/
|
||||
|
||||
perf_event_aux_event(handle->event, aux_head, size, flags);
|
||||
perf_event_aux_event(handle->event, aux_head, size,
|
||||
handle->aux_flags);
|
||||
}
|
||||
|
||||
aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
|
||||
@@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||
}
|
||||
|
||||
if (wakeup) {
|
||||
if (truncated)
|
||||
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
|
||||
handle->event->pending_disable = 1;
|
||||
perf_output_wakeup(handle);
|
||||
}
|
||||
|
@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
|
||||
}
|
||||
}
|
||||
|
||||
perf_event_namespaces(current);
|
||||
|
||||
bad_unshare_cleanup_cred:
|
||||
if (new_cred)
|
||||
put_cred(new_cred);
|
||||
|
@@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr)
|
||||
* This returns encoded errors if it fails to look up symbol or invalid
|
||||
* combination of parameters.
|
||||
*/
|
||||
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
||||
static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
|
||||
const char *symbol_name, unsigned int offset)
|
||||
{
|
||||
kprobe_opcode_t *addr = p->addr;
|
||||
|
||||
if ((p->symbol_name && p->addr) ||
|
||||
(!p->symbol_name && !p->addr))
|
||||
if ((symbol_name && addr) || (!symbol_name && !addr))
|
||||
goto invalid;
|
||||
|
||||
if (p->symbol_name) {
|
||||
kprobe_lookup_name(p->symbol_name, addr);
|
||||
if (symbol_name) {
|
||||
kprobe_lookup_name(symbol_name, addr);
|
||||
if (!addr)
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
|
||||
addr = (kprobe_opcode_t *)(((char *)addr) + offset);
|
||||
if (addr)
|
||||
return addr;
|
||||
|
||||
@@ -1413,6 +1411,11 @@ invalid:
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
||||
{
|
||||
return _kprobe_addr(p->addr, p->symbol_name, p->offset);
|
||||
}
|
||||
|
||||
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
|
||||
static struct kprobe *__get_valid_kprobe(struct kprobe *p)
|
||||
{
|
||||
@@ -1740,11 +1743,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobes);
|
||||
|
||||
int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
int __weak kprobe_exceptions_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
|
||||
|
||||
static struct notifier_block kprobe_exceptions_nb = {
|
||||
.notifier_call = kprobe_exceptions_notify,
|
||||
@@ -1875,6 +1879,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
}
|
||||
NOKPROBE_SYMBOL(pre_handler_kretprobe);
|
||||
|
||||
bool __weak arch_function_offset_within_entry(unsigned long offset)
|
||||
{
|
||||
return !offset;
|
||||
}
|
||||
|
||||
bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
|
||||
{
|
||||
kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
|
||||
|
||||
if (IS_ERR(kp_addr))
|
||||
return false;
|
||||
|
||||
if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
|
||||
!arch_function_offset_within_entry(offset))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
int ret = 0;
|
||||
@@ -1882,6 +1905,9 @@ int register_kretprobe(struct kretprobe *rp)
|
||||
int i;
|
||||
void *addr;
|
||||
|
||||
if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
|
||||
return -EINVAL;
|
||||
|
||||
if (kretprobe_blacklist_size) {
|
||||
addr = kprobe_addr(&rp->kp);
|
||||
if (IS_ERR(addr))
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include <linux/file.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
static struct kmem_cache *nsproxy_cachep;
|
||||
|
||||
@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
|
||||
goto out;
|
||||
}
|
||||
switch_task_namespaces(tsk, new_nsproxy);
|
||||
|
||||
perf_event_namespaces(tsk);
|
||||
out:
|
||||
fput(file);
|
||||
return err;
|
||||
|
@@ -455,7 +455,7 @@ config UPROBE_EVENTS
|
||||
select UPROBES
|
||||
select PROBE_EVENTS
|
||||
select TRACING
|
||||
default n
|
||||
default y
|
||||
help
|
||||
This allows the user to add tracing events on top of userspace
|
||||
dynamic events (similar to tracepoints) on the fly via the trace
|
||||
|
@@ -4355,6 +4355,7 @@ static const char readme_msg[] =
|
||||
"\t -:[<group>/]<event>\n"
|
||||
#ifdef CONFIG_KPROBE_EVENTS
|
||||
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
|
||||
"place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
|
||||
#endif
|
||||
#ifdef CONFIG_UPROBE_EVENTS
|
||||
"\t place: <path>:<offset>\n"
|
||||
|
@@ -681,10 +681,6 @@ static int create_trace_kprobe(int argc, char **argv)
|
||||
return -EINVAL;
|
||||
}
|
||||
if (isdigit(argv[1][0])) {
|
||||
if (is_return) {
|
||||
pr_info("Return probe point must be a symbol.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
/* an address specified */
|
||||
ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
|
||||
if (ret) {
|
||||
@@ -700,8 +696,9 @@ static int create_trace_kprobe(int argc, char **argv)
|
||||
pr_info("Failed to parse symbol.\n");
|
||||
return ret;
|
||||
}
|
||||
if (offset && is_return) {
|
||||
pr_info("Return probe must be used without offset.\n");
|
||||
if (offset && is_return &&
|
||||
!function_offset_within_entry(NULL, symbol, offset)) {
|
||||
pr_info("Given offset is not valid for return probe.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user