bpf: Introduce pinnable bpf_link abstraction

Introduce bpf_link abstraction, representing an attachment of BPF program to
a BPF hook point (e.g., tracepoint, perf event, etc). bpf_link encapsulates
ownership of attached BPF program, reference counting of a link itself, when
reference from multiple anonymous inodes, as well as ensures that release
callback will be called from a process context, so that users can safely take
mutex locks and sleep.

Additionally, with a new abstraction it's now possible to generalize pinning
of a link object in BPF FS, allowing to explicitly prevent BPF program
detachment on process exit by pinning it in a BPF FS and let it open from
independent other process to keep working with it.

Convert two existing bpf_link-like objects (raw tracepoint and tracing BPF
program attachments) into utilizing bpf_link framework, making them pinnable
in BPF FS. More FD-based bpf_links will be added in follow up patches.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200303043159.323675-2-andriin@fb.com
This commit is contained in:
Andrii Nakryiko
2020-03-02 20:31:57 -08:00
committed by Alexei Starovoitov
parent 775a2be52d
commit 70ed506c3b
3 changed files with 237 additions and 51 deletions

View File

@@ -2173,24 +2173,154 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
{
struct bpf_prog *prog = filp->private_data;
struct bpf_link {
atomic64_t refcnt;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
struct work_struct work;
};
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog)
{
atomic64_set(&link->refcnt, 1);
link->ops = ops;
link->prog = prog;
}
void bpf_link_inc(struct bpf_link *link)
{
atomic64_inc(&link->refcnt);
}
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
struct bpf_prog *prog;
/* remember prog locally, because release below will free link memory */
prog = link->prog;
/* extra clean up and kfree of container link struct */
link->ops->release(link);
/* no more accesing of link members after this point */
bpf_prog_put(prog);
}
static void bpf_link_put_deferred(struct work_struct *work)
{
struct bpf_link *link = container_of(work, struct bpf_link, work);
bpf_link_free(link);
}
/* bpf_link_put can be called from atomic context, but ensures that resources
* are freed from process context
*/
void bpf_link_put(struct bpf_link *link)
{
if (!atomic64_dec_and_test(&link->refcnt))
return;
if (in_atomic()) {
INIT_WORK(&link->work, bpf_link_put_deferred);
schedule_work(&link->work);
} else {
bpf_link_free(link);
}
}
static int bpf_link_release(struct inode *inode, struct file *filp)
{
struct bpf_link *link = filp->private_data;
bpf_link_put(link);
return 0;
}
static const struct file_operations bpf_tracing_prog_fops = {
.release = bpf_tracing_prog_release,
#ifdef CONFIG_PROC_FS
static const struct bpf_link_ops bpf_raw_tp_lops;
static const struct bpf_link_ops bpf_tracing_link_lops;
static const struct bpf_link_ops bpf_xdp_link_lops;
static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_link *link = filp->private_data;
const struct bpf_prog *prog = link->prog;
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
const char *link_type;
if (link->ops == &bpf_raw_tp_lops)
link_type = "raw_tracepoint";
else if (link->ops == &bpf_tracing_link_lops)
link_type = "tracing";
else
link_type = "unknown";
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
seq_printf(m,
"link_type:\t%s\n"
"prog_tag:\t%s\n"
"prog_id:\t%u\n",
link_type,
prog_tag,
prog->aux->id);
}
#endif
const struct file_operations bpf_link_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_link_show_fdinfo,
#endif
.release = bpf_link_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
};
int bpf_link_new_fd(struct bpf_link *link)
{
return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
}
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
{
struct fd f = fdget(ufd);
struct bpf_link *link;
if (!f.file)
return ERR_PTR(-EBADF);
if (f.file->f_op != &bpf_link_fops) {
fdput(f);
return ERR_PTR(-EINVAL);
}
link = f.file->private_data;
bpf_link_inc(link);
fdput(f);
return link;
}
struct bpf_tracing_link {
struct bpf_link link;
};
static void bpf_tracing_link_release(struct bpf_link *link)
{
struct bpf_tracing_link *tr_link =
container_of(link, struct bpf_tracing_link, link);
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
kfree(tr_link);
}
static const struct bpf_link_ops bpf_tracing_link_lops = {
.release = bpf_tracing_link_release,
};
static int bpf_tracing_prog_attach(struct bpf_prog *prog)
{
int tr_fd, err;
struct bpf_tracing_link *link;
int link_fd, err;
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
prog->expected_attach_type != BPF_TRACE_FEXIT &&
@@ -2199,58 +2329,61 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
goto out_put_prog;
}
err = bpf_trampoline_link_prog(prog);
if (err)
goto out_put_prog;
tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
prog, O_CLOEXEC);
if (tr_fd < 0) {
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
err = tr_fd;
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
goto out_put_prog;
}
return tr_fd;
bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
err = bpf_trampoline_link_prog(prog);
if (err)
goto out_free_link;
link_fd = bpf_link_new_fd(&link->link);
if (link_fd < 0) {
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
err = link_fd;
goto out_free_link;
}
return link_fd;
out_free_link:
kfree(link);
out_put_prog:
bpf_prog_put(prog);
return err;
}
struct bpf_raw_tracepoint {
struct bpf_raw_tp_link {
struct bpf_link link;
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
};
static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
static void bpf_raw_tp_link_release(struct bpf_link *link)
{
struct bpf_raw_tracepoint *raw_tp = filp->private_data;
struct bpf_raw_tp_link *raw_tp =
container_of(link, struct bpf_raw_tp_link, link);
if (raw_tp->prog) {
bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
bpf_prog_put(raw_tp->prog);
}
bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
bpf_put_raw_tracepoint(raw_tp->btp);
kfree(raw_tp);
return 0;
}
static const struct file_operations bpf_raw_tp_fops = {
.release = bpf_raw_tracepoint_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
static const struct bpf_link_ops bpf_raw_tp_lops = {
.release = bpf_raw_tp_link_release,
};
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
{
struct bpf_raw_tracepoint *raw_tp;
struct bpf_raw_tp_link *raw_tp;
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
const char *tp_name;
char buf[128];
int tp_fd, err;
int link_fd, err;
if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
return -EINVAL;
@@ -2302,21 +2435,20 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
err = -ENOMEM;
goto out_put_btp;
}
bpf_link_init(&raw_tp->link, &bpf_raw_tp_lops, prog);
raw_tp->btp = btp;
raw_tp->prog = prog;
err = bpf_probe_register(raw_tp->btp, prog);
if (err)
goto out_free_tp;
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
O_CLOEXEC);
if (tp_fd < 0) {
link_fd = bpf_link_new_fd(&raw_tp->link);
if (link_fd < 0) {
bpf_probe_unregister(raw_tp->btp, prog);
err = tp_fd;
err = link_fd;
goto out_free_tp;
}
return tp_fd;
return link_fd;
out_free_tp:
kfree(raw_tp);
@@ -3266,15 +3398,21 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
if (err)
goto out;
if (file->f_op == &bpf_raw_tp_fops) {
struct bpf_raw_tracepoint *raw_tp = file->private_data;
struct bpf_raw_event_map *btp = raw_tp->btp;
if (file->f_op == &bpf_link_fops) {
struct bpf_link *link = file->private_data;
err = bpf_task_fd_query_copy(attr, uattr,
raw_tp->prog->aux->id,
BPF_FD_TYPE_RAW_TRACEPOINT,
btp->tp->name, 0, 0);
goto put_file;
if (link->ops == &bpf_raw_tp_lops) {
struct bpf_raw_tp_link *raw_tp =
container_of(link, struct bpf_raw_tp_link, link);
struct bpf_raw_event_map *btp = raw_tp->btp;
err = bpf_task_fd_query_copy(attr, uattr,
raw_tp->link.prog->aux->id,
BPF_FD_TYPE_RAW_TRACEPOINT,
btp->tp->name, 0, 0);
goto put_file;
}
goto out_not_supp;
}
event = perf_get_event(file);
@@ -3294,6 +3432,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
goto put_file;
}
out_not_supp:
err = -ENOTSUPP;
put_file:
fput(file);