tracing, perf: Implement BPF programs attached to kprobes

BPF programs, attached to kprobes, provide a safe way to execute
user-defined BPF byte-code programs without being able to crash or
hang the kernel in any way. The BPF engine makes sure that such
programs have a finite execution time and that they cannot break
out of their sandbox.

The user interface is to attach to a kprobe via the perf syscall:

	struct perf_event_attr attr = {
		.type	= PERF_TYPE_TRACEPOINT,
		.config	= event_id,
		...
	};

	event_fd = perf_event_open(&attr,...);
	ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);

'prog_fd' is a file descriptor associated with BPF program
previously loaded.

'event_id' is an ID of the kprobe created.

Closing 'event_fd':

	close(event_fd);

... automatically detaches BPF program from it.

BPF programs can call in-kernel helper functions to:

  - lookup/update/delete elements in maps

  - probe_read - wraper of probe_kernel_read() used to access any
    kernel data structures

BPF programs receive 'struct pt_regs *' as an input ('struct pt_regs' is
architecture dependent) and return 0 to ignore the event and 1 to store
kprobe event into the ring buffer.

Note, kprobes are a fundamentally _not_ a stable kernel ABI,
so BPF programs attached to kprobes must be recompiled for
every kernel version and user must supply correct LINUX_VERSION_CODE
in attr.kern_version during bpf_prog_load() call.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1427312966-8434-4-git-send-email-ast@plumgrid.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Alexei Starovoitov
2015-03-25 12:49:20 -07:00
committed by Ingo Molnar
parent 72cbbc8994
commit 2541517c32
8 changed files with 219 additions and 1 deletions

View File

@@ -42,6 +42,8 @@
#include <linux/module.h>
#include <linux/mman.h>
#include <linux/compat.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include "internal.h"
@@ -3407,6 +3409,7 @@ errout:
}
static void perf_event_free_filter(struct perf_event *event);
static void perf_event_free_bpf_prog(struct perf_event *event);
static void free_event_rcu(struct rcu_head *head)
{
@@ -3416,6 +3419,7 @@ static void free_event_rcu(struct rcu_head *head)
if (event->ns)
put_pid_ns(event->ns);
perf_event_free_filter(event);
perf_event_free_bpf_prog(event);
kfree(event);
}
@@ -3928,6 +3932,7 @@ static inline int perf_fget_light(int fd, struct fd *p)
static int perf_event_set_output(struct perf_event *event,
struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
{
@@ -3981,6 +3986,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
case PERF_EVENT_IOC_SET_FILTER:
return perf_event_set_filter(event, (void __user *)arg);
case PERF_EVENT_IOC_SET_BPF:
return perf_event_set_bpf_prog(event, arg);
default:
return -ENOTTY;
}
@@ -6455,6 +6463,49 @@ static void perf_event_free_filter(struct perf_event *event)
ftrace_profile_free_filter(event);
}
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
struct bpf_prog *prog;
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -EINVAL;
if (event->tp_event->prog)
return -EEXIST;
if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
/* bpf programs can only be attached to kprobes */
return -EINVAL;
prog = bpf_prog_get(prog_fd);
if (IS_ERR(prog))
return PTR_ERR(prog);
if (prog->aux->prog_type != BPF_PROG_TYPE_KPROBE) {
/* valid fd, but invalid bpf program type */
bpf_prog_put(prog);
return -EINVAL;
}
event->tp_event->prog = prog;
return 0;
}
static void perf_event_free_bpf_prog(struct perf_event *event)
{
struct bpf_prog *prog;
if (!event->tp_event)
return;
prog = event->tp_event->prog;
if (prog) {
event->tp_event->prog = NULL;
bpf_prog_put(prog);
}
}
#else
static inline void perf_tp_register(void)
@@ -6470,6 +6521,14 @@ static void perf_event_free_filter(struct perf_event *event)
{
}
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
return -ENOENT;
}
static void perf_event_free_bpf_prog(struct perf_event *event)
{
}
#endif /* CONFIG_EVENT_TRACING */
#ifdef CONFIG_HAVE_HW_BREAKPOINT