bpf: Introduce pinnable bpf_link abstraction
Introduce bpf_link abstraction, representing an attachment of BPF program to a BPF hook point (e.g., tracepoint, perf event, etc). bpf_link encapsulates ownership of attached BPF program, reference counting of a link itself, when reference from multiple anonymous inodes, as well as ensures that release callback will be called from a process context, so that users can safely take mutex locks and sleep. Additionally, with a new abstraction it's now possible to generalize pinning of a link object in BPF FS, allowing to explicitly prevent BPF program detachment on process exit by pinning it in a BPF FS and let it open from independent other process to keep working with it. Convert two existing bpf_link-like objects (raw tracepoint and tracing BPF program attachments) into utilizing bpf_link framework, making them pinnable in BPF FS. More FD-based bpf_links will be added in follow up patches. Signed-off-by: Andrii Nakryiko <andriin@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200303043159.323675-2-andriin@fb.com
This commit is contained in:

committed by
Alexei Starovoitov

parent
775a2be52d
commit
70ed506c3b
@@ -2173,24 +2173,154 @@ static int bpf_obj_get(const union bpf_attr *attr)
|
||||
attr->file_flags);
|
||||
}
|
||||
|
||||
static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct bpf_prog *prog = filp->private_data;
|
||||
struct bpf_link {
|
||||
atomic64_t refcnt;
|
||||
const struct bpf_link_ops *ops;
|
||||
struct bpf_prog *prog;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
|
||||
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
atomic64_set(&link->refcnt, 1);
|
||||
link->ops = ops;
|
||||
link->prog = prog;
|
||||
}
|
||||
|
||||
void bpf_link_inc(struct bpf_link *link)
|
||||
{
|
||||
atomic64_inc(&link->refcnt);
|
||||
}
|
||||
|
||||
/* bpf_link_free is guaranteed to be called from process context */
|
||||
static void bpf_link_free(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
|
||||
/* remember prog locally, because release below will free link memory */
|
||||
prog = link->prog;
|
||||
/* extra clean up and kfree of container link struct */
|
||||
link->ops->release(link);
|
||||
/* no more accesing of link members after this point */
|
||||
bpf_prog_put(prog);
|
||||
}
|
||||
|
||||
static void bpf_link_put_deferred(struct work_struct *work)
|
||||
{
|
||||
struct bpf_link *link = container_of(work, struct bpf_link, work);
|
||||
|
||||
bpf_link_free(link);
|
||||
}
|
||||
|
||||
/* bpf_link_put can be called from atomic context, but ensures that resources
|
||||
* are freed from process context
|
||||
*/
|
||||
void bpf_link_put(struct bpf_link *link)
|
||||
{
|
||||
if (!atomic64_dec_and_test(&link->refcnt))
|
||||
return;
|
||||
|
||||
if (in_atomic()) {
|
||||
INIT_WORK(&link->work, bpf_link_put_deferred);
|
||||
schedule_work(&link->work);
|
||||
} else {
|
||||
bpf_link_free(link);
|
||||
}
|
||||
}
|
||||
|
||||
static int bpf_link_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct bpf_link *link = filp->private_data;
|
||||
|
||||
bpf_link_put(link);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations bpf_tracing_prog_fops = {
|
||||
.release = bpf_tracing_prog_release,
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static const struct bpf_link_ops bpf_raw_tp_lops;
|
||||
static const struct bpf_link_ops bpf_tracing_link_lops;
|
||||
static const struct bpf_link_ops bpf_xdp_link_lops;
|
||||
|
||||
static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
|
||||
{
|
||||
const struct bpf_link *link = filp->private_data;
|
||||
const struct bpf_prog *prog = link->prog;
|
||||
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
|
||||
const char *link_type;
|
||||
|
||||
if (link->ops == &bpf_raw_tp_lops)
|
||||
link_type = "raw_tracepoint";
|
||||
else if (link->ops == &bpf_tracing_link_lops)
|
||||
link_type = "tracing";
|
||||
else
|
||||
link_type = "unknown";
|
||||
|
||||
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
|
||||
seq_printf(m,
|
||||
"link_type:\t%s\n"
|
||||
"prog_tag:\t%s\n"
|
||||
"prog_id:\t%u\n",
|
||||
link_type,
|
||||
prog_tag,
|
||||
prog->aux->id);
|
||||
}
|
||||
#endif
|
||||
|
||||
const struct file_operations bpf_link_fops = {
|
||||
#ifdef CONFIG_PROC_FS
|
||||
.show_fdinfo = bpf_link_show_fdinfo,
|
||||
#endif
|
||||
.release = bpf_link_release,
|
||||
.read = bpf_dummy_read,
|
||||
.write = bpf_dummy_write,
|
||||
};
|
||||
|
||||
int bpf_link_new_fd(struct bpf_link *link)
|
||||
{
|
||||
return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
|
||||
}
|
||||
|
||||
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
|
||||
{
|
||||
struct fd f = fdget(ufd);
|
||||
struct bpf_link *link;
|
||||
|
||||
if (!f.file)
|
||||
return ERR_PTR(-EBADF);
|
||||
if (f.file->f_op != &bpf_link_fops) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
link = f.file->private_data;
|
||||
bpf_link_inc(link);
|
||||
fdput(f);
|
||||
|
||||
return link;
|
||||
}
|
||||
|
||||
struct bpf_tracing_link {
|
||||
struct bpf_link link;
|
||||
};
|
||||
|
||||
static void bpf_tracing_link_release(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_tracing_link *tr_link =
|
||||
container_of(link, struct bpf_tracing_link, link);
|
||||
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
|
||||
kfree(tr_link);
|
||||
}
|
||||
|
||||
static const struct bpf_link_ops bpf_tracing_link_lops = {
|
||||
.release = bpf_tracing_link_release,
|
||||
};
|
||||
|
||||
static int bpf_tracing_prog_attach(struct bpf_prog *prog)
|
||||
{
|
||||
int tr_fd, err;
|
||||
struct bpf_tracing_link *link;
|
||||
int link_fd, err;
|
||||
|
||||
if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
|
||||
prog->expected_attach_type != BPF_TRACE_FEXIT &&
|
||||
@@ -2199,58 +2329,61 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
|
||||
goto out_put_prog;
|
||||
}
|
||||
|
||||
err = bpf_trampoline_link_prog(prog);
|
||||
if (err)
|
||||
goto out_put_prog;
|
||||
|
||||
tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
|
||||
prog, O_CLOEXEC);
|
||||
if (tr_fd < 0) {
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
|
||||
err = tr_fd;
|
||||
link = kzalloc(sizeof(*link), GFP_USER);
|
||||
if (!link) {
|
||||
err = -ENOMEM;
|
||||
goto out_put_prog;
|
||||
}
|
||||
return tr_fd;
|
||||
bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
|
||||
|
||||
err = bpf_trampoline_link_prog(prog);
|
||||
if (err)
|
||||
goto out_free_link;
|
||||
|
||||
link_fd = bpf_link_new_fd(&link->link);
|
||||
if (link_fd < 0) {
|
||||
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
|
||||
err = link_fd;
|
||||
goto out_free_link;
|
||||
}
|
||||
return link_fd;
|
||||
|
||||
out_free_link:
|
||||
kfree(link);
|
||||
out_put_prog:
|
||||
bpf_prog_put(prog);
|
||||
return err;
|
||||
}
|
||||
|
||||
struct bpf_raw_tracepoint {
|
||||
struct bpf_raw_tp_link {
|
||||
struct bpf_link link;
|
||||
struct bpf_raw_event_map *btp;
|
||||
struct bpf_prog *prog;
|
||||
};
|
||||
|
||||
static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
|
||||
static void bpf_raw_tp_link_release(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_raw_tracepoint *raw_tp = filp->private_data;
|
||||
struct bpf_raw_tp_link *raw_tp =
|
||||
container_of(link, struct bpf_raw_tp_link, link);
|
||||
|
||||
if (raw_tp->prog) {
|
||||
bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
|
||||
bpf_prog_put(raw_tp->prog);
|
||||
}
|
||||
bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
|
||||
bpf_put_raw_tracepoint(raw_tp->btp);
|
||||
kfree(raw_tp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations bpf_raw_tp_fops = {
|
||||
.release = bpf_raw_tracepoint_release,
|
||||
.read = bpf_dummy_read,
|
||||
.write = bpf_dummy_write,
|
||||
static const struct bpf_link_ops bpf_raw_tp_lops = {
|
||||
.release = bpf_raw_tp_link_release,
|
||||
};
|
||||
|
||||
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
|
||||
|
||||
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_raw_tracepoint *raw_tp;
|
||||
struct bpf_raw_tp_link *raw_tp;
|
||||
struct bpf_raw_event_map *btp;
|
||||
struct bpf_prog *prog;
|
||||
const char *tp_name;
|
||||
char buf[128];
|
||||
int tp_fd, err;
|
||||
int link_fd, err;
|
||||
|
||||
if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
|
||||
return -EINVAL;
|
||||
@@ -2302,21 +2435,20 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||
err = -ENOMEM;
|
||||
goto out_put_btp;
|
||||
}
|
||||
bpf_link_init(&raw_tp->link, &bpf_raw_tp_lops, prog);
|
||||
raw_tp->btp = btp;
|
||||
raw_tp->prog = prog;
|
||||
|
||||
err = bpf_probe_register(raw_tp->btp, prog);
|
||||
if (err)
|
||||
goto out_free_tp;
|
||||
|
||||
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
|
||||
O_CLOEXEC);
|
||||
if (tp_fd < 0) {
|
||||
link_fd = bpf_link_new_fd(&raw_tp->link);
|
||||
if (link_fd < 0) {
|
||||
bpf_probe_unregister(raw_tp->btp, prog);
|
||||
err = tp_fd;
|
||||
err = link_fd;
|
||||
goto out_free_tp;
|
||||
}
|
||||
return tp_fd;
|
||||
return link_fd;
|
||||
|
||||
out_free_tp:
|
||||
kfree(raw_tp);
|
||||
@@ -3266,15 +3398,21 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_op == &bpf_raw_tp_fops) {
|
||||
struct bpf_raw_tracepoint *raw_tp = file->private_data;
|
||||
struct bpf_raw_event_map *btp = raw_tp->btp;
|
||||
if (file->f_op == &bpf_link_fops) {
|
||||
struct bpf_link *link = file->private_data;
|
||||
|
||||
err = bpf_task_fd_query_copy(attr, uattr,
|
||||
raw_tp->prog->aux->id,
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT,
|
||||
btp->tp->name, 0, 0);
|
||||
goto put_file;
|
||||
if (link->ops == &bpf_raw_tp_lops) {
|
||||
struct bpf_raw_tp_link *raw_tp =
|
||||
container_of(link, struct bpf_raw_tp_link, link);
|
||||
struct bpf_raw_event_map *btp = raw_tp->btp;
|
||||
|
||||
err = bpf_task_fd_query_copy(attr, uattr,
|
||||
raw_tp->link.prog->aux->id,
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT,
|
||||
btp->tp->name, 0, 0);
|
||||
goto put_file;
|
||||
}
|
||||
goto out_not_supp;
|
||||
}
|
||||
|
||||
event = perf_get_event(file);
|
||||
@@ -3294,6 +3432,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
|
||||
goto put_file;
|
||||
}
|
||||
|
||||
out_not_supp:
|
||||
err = -ENOTSUPP;
|
||||
put_file:
|
||||
fput(file);
|
||||
|
Reference in New Issue
Block a user