bpf: Introduce BPF_PROG_TYPE_STRUCT_OPS

This patch allows the kernel's struct ops (i.e. func ptr) to be
implemented in BPF.  The first use case in this series is the
"struct tcp_congestion_ops" which will be introduced in a
latter patch.

This patch introduces a new prog type BPF_PROG_TYPE_STRUCT_OPS.
The BPF_PROG_TYPE_STRUCT_OPS prog is verified against a particular
func ptr of a kernel struct.  The attr->attach_btf_id is the btf id
of a kernel struct.  The attr->expected_attach_type is the member
"index" of that kernel struct.  The first member of a struct starts
with member index 0.  That will avoid ambiguity when a kernel struct
has multiple func ptrs with the same func signature.

For example, a BPF_PROG_TYPE_STRUCT_OPS prog is written
to implement the "init" func ptr of the "struct tcp_congestion_ops".
The attr->attach_btf_id is the btf id of the "struct tcp_congestion_ops"
of the _running_ kernel.  The attr->expected_attach_type is 3.

The ctx of BPF_PROG_TYPE_STRUCT_OPS is an array of u64 args saved
by arch_prepare_bpf_trampoline that will be done in the next
patch when introducing BPF_MAP_TYPE_STRUCT_OPS.

"struct bpf_struct_ops" is introduced as a common interface for the kernel
struct that supports BPF_PROG_TYPE_STRUCT_OPS prog.  The supporting kernel
struct will need to implement an instance of the "struct bpf_struct_ops".

The supporting kernel struct also needs to implement a bpf_verifier_ops.
During BPF_PROG_LOAD, bpf_struct_ops_find() will find the right
bpf_verifier_ops by searching the attr->attach_btf_id.

A new "btf_struct_access" is also added to the bpf_verifier_ops such
that the supporting kernel struct can optionally provide its own specific
check on accessing the func arg (e.g. provide limited write access).

After btf_vmlinux is parsed, the new bpf_struct_ops_init() is called
to initialize some values (e.g. the btf id of the supporting kernel
struct) and it can only be done once the btf_vmlinux is available.

The R0 checks at BPF_EXIT is excluded for the BPF_PROG_TYPE_STRUCT_OPS prog
if the return type of the prog->aux->attach_func_proto is "void".

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200109003503.3855825-1-kafai@fb.com
This commit is contained in:
Martin KaFai Lau
2020-01-08 16:35:03 -08:00
committed by Alexei Starovoitov
parent 976aba002f
commit 27ae7997a6
10 changed files with 373 additions and 63 deletions

View File

@@ -2859,11 +2859,6 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
u32 btf_id;
int ret;
if (atype != BPF_READ) {
verbose(env, "only read is supported\n");
return -EACCES;
}
if (off < 0) {
verbose(env,
"R%d is ptr_%s invalid negative access: off=%d\n",
@@ -2880,17 +2875,32 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
if (env->ops->btf_struct_access) {
ret = env->ops->btf_struct_access(&env->log, t, off, size,
atype, &btf_id);
} else {
if (atype != BPF_READ) {
verbose(env, "only read is supported\n");
return -EACCES;
}
ret = btf_struct_access(&env->log, t, off, size, atype,
&btf_id);
}
if (ret < 0)
return ret;
if (ret == SCALAR_VALUE) {
mark_reg_unknown(env, regs, value_regno);
return 0;
if (atype == BPF_READ) {
if (ret == SCALAR_VALUE) {
mark_reg_unknown(env, regs, value_regno);
return 0;
}
mark_reg_known_zero(env, regs, value_regno);
regs[value_regno].type = PTR_TO_BTF_ID;
regs[value_regno].btf_id = btf_id;
}
mark_reg_known_zero(env, regs, value_regno);
regs[value_regno].type = PTR_TO_BTF_ID;
regs[value_regno].btf_id = btf_id;
return 0;
}
@@ -6349,8 +6359,30 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
static int check_return_code(struct bpf_verifier_env *env)
{
struct tnum enforce_attach_type_range = tnum_unknown;
const struct bpf_prog *prog = env->prog;
struct bpf_reg_state *reg;
struct tnum range = tnum_range(0, 1);
int err;
/* The struct_ops func-ptr's return type could be "void" */
if (env->prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
!prog->aux->attach_func_proto->type)
return 0;
/* eBPF calling convetion is such that R0 is used
* to return the value from eBPF program.
* Make sure that it's readable at this time
* of bpf_exit, which means that program wrote
* something into it earlier
*/
err = check_reg_arg(env, BPF_REG_0, SRC_OP);
if (err)
return err;
if (is_pointer_value(env, BPF_REG_0)) {
verbose(env, "R0 leaks addr as return value\n");
return -EACCES;
}
switch (env->prog->type) {
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
@@ -8016,21 +8048,6 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
/* eBPF calling convetion is such that R0 is used
* to return the value from eBPF program.
* Make sure that it's readable at this time
* of bpf_exit, which means that program wrote
* something into it earlier
*/
err = check_reg_arg(env, BPF_REG_0, SRC_OP);
if (err)
return err;
if (is_pointer_value(env, BPF_REG_0)) {
verbose(env, "R0 leaks addr as return value\n");
return -EACCES;
}
err = check_return_code(env);
if (err)
return err;
@@ -8829,12 +8846,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
break;
case PTR_TO_BTF_ID:
if (type == BPF_WRITE) {
if (type == BPF_READ) {
insn->code = BPF_LDX | BPF_PROBE_MEM |
BPF_SIZE((insn)->code);
env->prog->aux->num_exentries++;
} else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
verbose(env, "Writes through BTF pointers are not allowed\n");
return -EINVAL;
}
insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
env->prog->aux->num_exentries++;
continue;
default:
continue;
@@ -9502,6 +9521,58 @@ static void print_verification_stats(struct bpf_verifier_env *env)
env->peak_states, env->longest_mark_read_walk);
}
static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
{
const struct btf_type *t, *func_proto;
const struct bpf_struct_ops *st_ops;
const struct btf_member *member;
struct bpf_prog *prog = env->prog;
u32 btf_id, member_idx;
const char *mname;
btf_id = prog->aux->attach_btf_id;
st_ops = bpf_struct_ops_find(btf_id);
if (!st_ops) {
verbose(env, "attach_btf_id %u is not a supported struct\n",
btf_id);
return -ENOTSUPP;
}
t = st_ops->type;
member_idx = prog->expected_attach_type;
if (member_idx >= btf_type_vlen(t)) {
verbose(env, "attach to invalid member idx %u of struct %s\n",
member_idx, st_ops->name);
return -EINVAL;
}
member = &btf_type_member(t)[member_idx];
mname = btf_name_by_offset(btf_vmlinux, member->name_off);
func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
NULL);
if (!func_proto) {
verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
mname, member_idx, st_ops->name);
return -EINVAL;
}
if (st_ops->check_member) {
int err = st_ops->check_member(t, member);
if (err) {
verbose(env, "attach to unsupported member %s of struct %s\n",
mname, st_ops->name);
return err;
}
}
prog->aux->attach_func_proto = func_proto;
prog->aux->attach_func_name = mname;
env->ops = st_ops->verifier_ops;
return 0;
}
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
@@ -9517,6 +9588,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
long addr;
u64 key;
if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
return check_struct_ops_btf_id(env);
if (prog->type != BPF_PROG_TYPE_TRACING)
return 0;